In message: [PATCH linux-yocto v6.1] net: dev: Reset dev.c to match
v6.1/standard/base branch
on 10/02/2026 Kevin Hao wrote:
> The merge commit 043f84176868 ("Merge branch 'v6.1/standard/base'
> into v6.1/standard/cn-sdkv5.15/octeon") incorrectly updated
> net/core/dev.c to the version from the v6.6/standard/preempt-rt/base
> branch. This change restores the file to its original state as found
> in the v6.1/standard/base branch.
>
> Signed-off-by: Kevin Hao <[email protected]>
> ---
> Hi Bruce,
>
> Please merge this into the following branch:
> v6.1/standard/cn-sdkv5.15/octeon
merged.
My scripts must have picked up a file I had around
for conflict resolution that wasn't removed and copied
it over the 6.1 branch as a fixup.
Cheers,
Bruce
> ---
> net/core/dev.c | 944
> ++++++++++++++++++++++++---------------------------------
> 1 file changed, 403 insertions(+), 541 deletions(-)
>
> diff --git a/net/core/dev.c b/net/core/dev.c
> index
> 5b53496b2666fa710bb75e1d5ef403e15f1dd974..69bb7ac73d047aa4428b4d4eaa67381850cf2b2d
> 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -69,7 +69,7 @@
> */
>
> #include <linux/uaccess.h>
> -#include <linux/bitmap.h>
> +#include <linux/bitops.h>
> #include <linux/capability.h>
> #include <linux/cpu.h>
> #include <linux/types.h>
> @@ -107,7 +107,6 @@
> #include <net/pkt_cls.h>
> #include <net/checksum.h>
> #include <net/xfrm.h>
> -#include <net/tcx.h>
> #include <linux/highmem.h>
> #include <linux/init.h>
> #include <linux/module.h>
> @@ -133,7 +132,6 @@
> #include <trace/events/net.h>
> #include <trace/events/skb.h>
> #include <trace/events/qdisc.h>
> -#include <trace/events/xdp.h>
> #include <linux/inetdevice.h>
> #include <linux/cpu_rmap.h>
> #include <linux/static_key.h>
> @@ -152,16 +150,18 @@
> #include <linux/pm_runtime.h>
> #include <linux/prandom.h>
> #include <linux/once_lite.h>
> -#include <net/netdev_rx_queue.h>
>
> #include "dev.h"
> #include "net-sysfs.h"
>
> +
> static DEFINE_SPINLOCK(ptype_lock);
> struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
> struct list_head ptype_all __read_mostly; /* Taps */
>
> static int netif_rx_internal(struct sk_buff *skb);
> +static int call_netdevice_notifiers_info(unsigned long val,
> + struct netdev_notifier_info *info);
> static int call_netdevice_notifiers_extack(unsigned long val,
> struct net_device *dev,
> struct netlink_ext_ack *extack);
> @@ -396,9 +396,6 @@ static void list_netdevice(struct net_device *dev)
> netdev_for_each_altname(dev, name_node)
> netdev_name_node_add(net, name_node);
>
> - /* We reserved the ifindex, this can't fail */
> - WARN_ON(xa_store(&net->dev_by_index, dev->ifindex, dev, GFP_KERNEL));
> -
> dev_base_seq_inc(net);
> }
>
> @@ -408,12 +405,9 @@ static void list_netdevice(struct net_device *dev)
> static void unlist_netdevice(struct net_device *dev, bool lock)
> {
> struct netdev_name_node *name_node;
> - struct net *net = dev_net(dev);
>
> ASSERT_RTNL();
>
> - xa_erase(&net->dev_by_index, dev->ifindex);
> -
> netdev_for_each_altname(dev, name_node)
> netdev_name_node_del(name_node);
>
> @@ -789,7 +783,18 @@ struct net_device *dev_get_by_name_rcu(struct net *net,
> const char *name)
> }
> EXPORT_SYMBOL(dev_get_by_name_rcu);
>
> -/* Deprecated for new users, call netdev_get_by_name() instead */
> +/**
> + * dev_get_by_name - find a device by its name
> + * @net: the applicable net namespace
> + * @name: name to find
> + *
> + * Find an interface by name. This can be called from any
> + * context and does its own locking. The returned handle has
> + * the usage count incremented and the caller must use dev_put() to
> + * release it when it is no longer needed. %NULL is returned if no
> + * matching device is found.
> + */
> +
> struct net_device *dev_get_by_name(struct net *net, const char *name)
> {
> struct net_device *dev;
> @@ -802,31 +807,6 @@ struct net_device *dev_get_by_name(struct net *net,
> const char *name)
> }
> EXPORT_SYMBOL(dev_get_by_name);
>
> -/**
> - * netdev_get_by_name() - find a device by its name
> - * @net: the applicable net namespace
> - * @name: name to find
> - * @tracker: tracking object for the acquired reference
> - * @gfp: allocation flags for the tracker
> - *
> - * Find an interface by name. This can be called from any
> - * context and does its own locking. The returned handle has
> - * the usage count incremented and the caller must use netdev_put() to
> - * release it when it is no longer needed. %NULL is returned if no
> - * matching device is found.
> - */
> -struct net_device *netdev_get_by_name(struct net *net, const char *name,
> - netdevice_tracker *tracker, gfp_t gfp)
> -{
> - struct net_device *dev;
> -
> - dev = dev_get_by_name(net, name);
> - if (dev)
> - netdev_tracker_alloc(dev, tracker, gfp);
> - return dev;
> -}
> -EXPORT_SYMBOL(netdev_get_by_name);
> -
> /**
> * __dev_get_by_index - find a device by its ifindex
> * @net: the applicable net namespace
> @@ -876,7 +856,18 @@ struct net_device *dev_get_by_index_rcu(struct net *net,
> int ifindex)
> }
> EXPORT_SYMBOL(dev_get_by_index_rcu);
>
> -/* Deprecated for new users, call netdev_get_by_index() instead */
> +
> +/**
> + * dev_get_by_index - find a device by its ifindex
> + * @net: the applicable net namespace
> + * @ifindex: index of device
> + *
> + * Search for an interface by index. Returns NULL if the device
> + * is not found or a pointer to the device. The device returned has
> + * had a reference added and the pointer is safe until the user calls
> + * dev_put to indicate they have finished with it.
> + */
> +
> struct net_device *dev_get_by_index(struct net *net, int ifindex)
> {
> struct net_device *dev;
> @@ -889,30 +880,6 @@ struct net_device *dev_get_by_index(struct net *net, int
> ifindex)
> }
> EXPORT_SYMBOL(dev_get_by_index);
>
> -/**
> - * netdev_get_by_index() - find a device by its ifindex
> - * @net: the applicable net namespace
> - * @ifindex: index of device
> - * @tracker: tracking object for the acquired reference
> - * @gfp: allocation flags for the tracker
> - *
> - * Search for an interface by index. Returns NULL if the device
> - * is not found or a pointer to the device. The device returned has
> - * had a reference added and the pointer is safe until the user calls
> - * netdev_put() to indicate they have finished with it.
> - */
> -struct net_device *netdev_get_by_index(struct net *net, int ifindex,
> - netdevice_tracker *tracker, gfp_t gfp)
> -{
> - struct net_device *dev;
> -
> - dev = dev_get_by_index(net, ifindex);
> - if (dev)
> - netdev_tracker_alloc(dev, tracker, gfp);
> - return dev;
> -}
> -EXPORT_SYMBOL(netdev_get_by_index);
> -
> /**
> * dev_get_by_napi_id - find a device by napi_id
> * @napi_id: ID of the NAPI struct
> @@ -1134,7 +1101,7 @@ static int __dev_alloc_name(struct net *net, const char
> *name, char *buf)
> return -EINVAL;
>
> /* Use one page as a bit array of possible slots */
> - inuse = bitmap_zalloc(max_netdevices, GFP_ATOMIC);
> + inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
> if (!inuse)
> return -ENOMEM;
>
> @@ -1164,7 +1131,7 @@ static int __dev_alloc_name(struct net *net, const char
> *name, char *buf)
> }
>
> i = find_first_zero_bit(inuse, max_netdevices);
> - bitmap_free(inuse);
> + free_page((unsigned long) inuse);
> }
>
> snprintf(buf, IFNAMSIZ, name, i);
> @@ -1265,6 +1232,22 @@ int dev_change_name(struct net_device *dev, const char
> *newname)
>
> net = dev_net(dev);
>
> + /* Some auto-enslaved devices e.g. failover slaves are
> + * special, as userspace might rename the device after
> + * the interface had been brought up and running since
> + * the point kernel initiated auto-enslavement. Allow
> + * live name change even when these slave devices are
> + * up and running.
> + *
> + * Typically, users of these auto-enslaving devices
> + * don't actually care about slave name change, as
> + * they are supposed to operate on master interface
> + * directly.
> + */
> + if (dev->flags & IFF_UP &&
> + likely(!(dev->priv_flags & IFF_LIVE_RENAME_OK)))
> + return -EBUSY;
> +
> down_write(&devnet_rename_sem);
>
> if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
> @@ -1281,8 +1264,7 @@ int dev_change_name(struct net_device *dev, const char
> *newname)
> }
>
> if (oldname[0] && !strchr(oldname, '%'))
> - netdev_info(dev, "renamed from %s%s\n", oldname,
> - dev->flags & IFF_UP ? " (while UP)" : "");
> + netdev_info(dev, "renamed from %s\n", oldname);
>
> old_assign_type = dev->name_assign_type;
> dev->name_assign_type = NET_NAME_RENAMED;
> @@ -1420,7 +1402,7 @@ void netdev_state_change(struct net_device *dev)
>
> call_netdevice_notifiers_info(NETDEV_CHANGE,
> &change_info.info);
> - rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL, 0, NULL);
> + rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
> }
> }
> EXPORT_SYMBOL(netdev_state_change);
> @@ -1556,7 +1538,7 @@ int dev_open(struct net_device *dev, struct
> netlink_ext_ack *extack)
> if (ret < 0)
> return ret;
>
> - rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP | IFF_RUNNING, GFP_KERNEL, 0,
> NULL);
> + rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
> call_netdevice_notifiers(NETDEV_UP, dev);
>
> return ret;
> @@ -1628,7 +1610,7 @@ void dev_close_many(struct list_head *head, bool unlink)
> __dev_close_many(head);
>
> list_for_each_entry_safe(dev, tmp, head, close_list) {
> - rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP | IFF_RUNNING,
> GFP_KERNEL, 0, NULL);
> + rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
> call_netdevice_notifiers(NETDEV_DOWN, dev);
> if (unlink)
> list_del_init(&dev->close_list);
> @@ -1708,15 +1690,14 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd)
> N(UP) N(DOWN) N(REBOOT) N(CHANGE) N(REGISTER) N(UNREGISTER)
> N(CHANGEMTU) N(CHANGEADDR) N(GOING_DOWN) N(CHANGENAME) N(FEAT_CHANGE)
> N(BONDING_FAILOVER) N(PRE_UP) N(PRE_TYPE_CHANGE) N(POST_TYPE_CHANGE)
> - N(POST_INIT) N(PRE_UNINIT) N(RELEASE) N(NOTIFY_PEERS) N(JOIN)
> - N(CHANGEUPPER) N(RESEND_IGMP) N(PRECHANGEMTU) N(CHANGEINFODATA)
> - N(BONDING_INFO) N(PRECHANGEUPPER) N(CHANGELOWERSTATE)
> - N(UDP_TUNNEL_PUSH_INFO) N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
> + N(POST_INIT) N(RELEASE) N(NOTIFY_PEERS) N(JOIN) N(CHANGEUPPER)
> + N(RESEND_IGMP) N(PRECHANGEMTU) N(CHANGEINFODATA) N(BONDING_INFO)
> + N(PRECHANGEUPPER) N(CHANGELOWERSTATE) N(UDP_TUNNEL_PUSH_INFO)
> + N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
> N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO)
> N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
> N(PRE_CHANGEADDR) N(OFFLOAD_XSTATS_ENABLE) N(OFFLOAD_XSTATS_DISABLE)
> N(OFFLOAD_XSTATS_REPORT_USED) N(OFFLOAD_XSTATS_REPORT_DELTA)
> - N(XDP_FEAT_CHANGE)
> }
> #undef N
> return "UNKNOWN_NETDEV_EVENT";
> @@ -1943,7 +1924,7 @@ EXPORT_SYMBOL(register_netdevice_notifier_net);
> * @nb: notifier
> *
> * Unregister a notifier previously registered by
> - * register_netdevice_notifier_net(). The notifier is unlinked from the
> + * register_netdevice_notifier(). The notifier is unlinked into the
> * kernel structures and may then be reused. A negative errno code
> * is returned on a failure.
> *
> @@ -1964,14 +1945,6 @@ int unregister_netdevice_notifier_net(struct net *net,
> }
> EXPORT_SYMBOL(unregister_netdevice_notifier_net);
>
> -static void __move_netdevice_notifier_net(struct net *src_net,
> - struct net *dst_net,
> - struct notifier_block *nb)
> -{
> - __unregister_netdevice_notifier_net(src_net, nb);
> - __register_netdevice_notifier_net(dst_net, nb, true);
> -}
> -
> int register_netdevice_notifier_dev_net(struct net_device *dev,
> struct notifier_block *nb,
> struct netdev_net_notifier *nn)
> @@ -2008,8 +1981,10 @@ static void move_netdevice_notifiers_dev_net(struct
> net_device *dev,
> {
> struct netdev_net_notifier *nn;
>
> - list_for_each_entry(nn, &dev->net_notifier_list, list)
> - __move_netdevice_notifier_net(dev_net(dev), net, nn->nb);
> + list_for_each_entry(nn, &dev->net_notifier_list, list) {
> + __unregister_netdevice_notifier_net(dev_net(dev), nn->nb);
> + __register_netdevice_notifier_net(net, nn->nb, true);
> + }
> }
>
> /**
> @@ -2021,8 +1996,8 @@ static void move_netdevice_notifiers_dev_net(struct
> net_device *dev,
> * are as for raw_notifier_call_chain().
> */
>
> -int call_netdevice_notifiers_info(unsigned long val,
> - struct netdev_notifier_info *info)
> +static int call_netdevice_notifiers_info(unsigned long val,
> + struct netdev_notifier_info *info)
> {
> struct net *net = dev_net(info->dev);
> int ret;
> @@ -2168,10 +2143,13 @@ static DECLARE_WORK(netstamp_work, netstamp_clear);
> void net_enable_timestamp(void)
> {
> #ifdef CONFIG_JUMP_LABEL
> - int wanted = atomic_read(&netstamp_wanted);
> + int wanted;
>
> - while (wanted > 0) {
> - if (atomic_try_cmpxchg(&netstamp_wanted, &wanted, wanted + 1))
> + while (1) {
> + wanted = atomic_read(&netstamp_wanted);
> + if (wanted <= 0)
> + break;
> + if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted + 1) ==
> wanted)
> return;
> }
> atomic_inc(&netstamp_needed_deferred);
> @@ -2185,10 +2163,13 @@ EXPORT_SYMBOL(net_enable_timestamp);
> void net_disable_timestamp(void)
> {
> #ifdef CONFIG_JUMP_LABEL
> - int wanted = atomic_read(&netstamp_wanted);
> + int wanted;
>
> - while (wanted > 1) {
> - if (atomic_try_cmpxchg(&netstamp_wanted, &wanted, wanted - 1))
> + while (1) {
> + wanted = atomic_read(&netstamp_wanted);
> + if (wanted <= 1)
> + break;
> + if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted - 1) ==
> wanted)
> return;
> }
> atomic_dec(&netstamp_needed_deferred);
> @@ -2202,7 +2183,7 @@ EXPORT_SYMBOL(net_disable_timestamp);
> static inline void net_timestamp_set(struct sk_buff *skb)
> {
> skb->tstamp = 0;
> - skb->tstamp_type = SKB_CLOCK_REALTIME;
> + skb->mono_delivery_time = 0;
> if (static_branch_unlikely(&netstamp_needed_key))
> skb->tstamp = ktime_get_real();
> }
> @@ -2461,7 +2442,8 @@ static bool remove_xps_queue(struct xps_dev_maps
> *dev_maps,
> struct xps_map *map = NULL;
> int pos;
>
> - map = xmap_dereference(dev_maps->attr_map[tci]);
> + if (dev_maps)
> + map = xmap_dereference(dev_maps->attr_map[tci]);
> if (!map)
> return false;
>
> @@ -3097,8 +3079,6 @@ void netif_set_tso_max_size(struct net_device *dev,
> unsigned int size)
> dev->tso_max_size = min(GSO_MAX_SIZE, size);
> if (size < READ_ONCE(dev->gso_max_size))
> netif_set_gso_max_size(dev, size);
> - if (size < READ_ONCE(dev->gso_ipv4_max_size))
> - netif_set_gso_ipv4_max_size(dev, size);
> }
> EXPORT_SYMBOL(netif_set_tso_max_size);
>
> @@ -3178,7 +3158,7 @@ void __netif_schedule(struct Qdisc *q)
> EXPORT_SYMBOL(__netif_schedule);
>
> struct dev_kfree_skb_cb {
> - enum skb_drop_reason reason;
> + enum skb_free_reason reason;
> };
>
> static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)
> @@ -3211,7 +3191,7 @@ void netif_tx_wake_queue(struct netdev_queue *dev_queue)
> }
> EXPORT_SYMBOL(netif_tx_wake_queue);
>
> -void dev_kfree_skb_irq_reason(struct sk_buff *skb, enum skb_drop_reason
> reason)
> +void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
> {
> unsigned long flags;
>
> @@ -3231,16 +3211,18 @@ void dev_kfree_skb_irq_reason(struct sk_buff *skb,
> enum skb_drop_reason reason)
> raise_softirq_irqoff(NET_TX_SOFTIRQ);
> local_irq_restore(flags);
> }
> -EXPORT_SYMBOL(dev_kfree_skb_irq_reason);
> +EXPORT_SYMBOL(__dev_kfree_skb_irq);
>
> -void dev_kfree_skb_any_reason(struct sk_buff *skb, enum skb_drop_reason
> reason)
> +void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason)
> {
> if (in_hardirq() || irqs_disabled())
> - dev_kfree_skb_irq_reason(skb, reason);
> + __dev_kfree_skb_irq(skb, reason);
> + else if (unlikely(reason == SKB_REASON_DROPPED))
> + kfree_skb(skb);
> else
> - kfree_skb_reason(skb, reason);
> + consume_skb(skb);
> }
> -EXPORT_SYMBOL(dev_kfree_skb_any_reason);
> +EXPORT_SYMBOL(__dev_kfree_skb_any);
>
>
> /**
> @@ -3312,7 +3294,7 @@ static u16 skb_tx_hash(const struct net_device *dev,
> return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
> }
>
> -void skb_warn_bad_offload(const struct sk_buff *skb)
> +static void skb_warn_bad_offload(const struct sk_buff *skb)
> {
> static const netdev_features_t null_features;
> struct net_device *dev = skb->dev;
> @@ -3422,7 +3404,8 @@ int skb_crc32c_csum_help(struct sk_buff *skb)
> skb->len - start, ~(__u32)0,
> crc32c_csum_stub));
> *(__le32 *)(skb->data + offset) = crc32c_csum;
> - skb_reset_csum_not_inet(skb);
> + skb->ip_summed = CHECKSUM_NONE;
> + skb->csum_not_inet = 0;
> out:
> return ret;
> }
> @@ -3445,6 +3428,74 @@ __be16 skb_network_protocol(struct sk_buff *skb, int
> *depth)
> return vlan_get_protocol_and_depth(skb, type, depth);
> }
>
> +/* openvswitch calls this on rx path, so we need a different check.
> + */
> +static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
> +{
> + if (tx_path)
> + return skb->ip_summed != CHECKSUM_PARTIAL &&
> + skb->ip_summed != CHECKSUM_UNNECESSARY;
> +
> + return skb->ip_summed == CHECKSUM_NONE;
> +}
> +
> +/**
> + * __skb_gso_segment - Perform segmentation on skb.
> + * @skb: buffer to segment
> + * @features: features for the output path (see dev->features)
> + * @tx_path: whether it is called in TX path
> + *
> + * This function segments the given skb and returns a list of segments.
> + *
> + * It may return NULL if the skb requires no segmentation. This is
> + * only possible when GSO is used for verifying header integrity.
> + *
> + * Segmentation preserves SKB_GSO_CB_OFFSET bytes of previous skb cb.
> + */
> +struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
> + netdev_features_t features, bool tx_path)
> +{
> + struct sk_buff *segs;
> +
> + if (unlikely(skb_needs_check(skb, tx_path))) {
> + int err;
> +
> + /* We're going to init ->check field in TCP or UDP header */
> + err = skb_cow_head(skb, 0);
> + if (err < 0)
> + return ERR_PTR(err);
> + }
> +
> + /* Only report GSO partial support if it will enable us to
> + * support segmentation on this frame without needing additional
> + * work.
> + */
> + if (features & NETIF_F_GSO_PARTIAL) {
> + netdev_features_t partial_features = NETIF_F_GSO_ROBUST;
> + struct net_device *dev = skb->dev;
> +
> + partial_features |= dev->features & dev->gso_partial_features;
> + if (!skb_gso_ok(skb, features | partial_features))
> + features &= ~NETIF_F_GSO_PARTIAL;
> + }
> +
> + BUILD_BUG_ON(SKB_GSO_CB_OFFSET +
> + sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb));
> +
> + SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
> + SKB_GSO_CB(skb)->encap_level = 0;
> +
> + skb_reset_mac_header(skb);
> + skb_reset_mac_len(skb);
> +
> + segs = skb_mac_gso_segment(skb, features);
> +
> + if (segs != skb && unlikely(skb_needs_check(skb, tx_path) &&
> !IS_ERR(segs)))
> + skb_warn_bad_offload(skb);
> +
> + return segs;
> +}
> +EXPORT_SYMBOL(__skb_gso_segment);
>
> /* Take action when hardware reception checksum errors are detected. */
> #ifdef CONFIG_BUG
> @@ -3544,7 +3595,7 @@ static netdev_features_t gso_features_check(const
> struct sk_buff *skb,
> if (gso_segs > READ_ONCE(dev->gso_max_segs))
> return features & ~NETIF_F_GSO_MASK;
>
> - if (unlikely(skb->len >= netif_get_gso_max_size(dev, skb)))
> + if (unlikely(skb->len >= READ_ONCE(dev->gso_max_size)))
> return features & ~NETIF_F_GSO_MASK;
>
> if (!skb_shinfo(skb)->gso_type) {
> @@ -3793,25 +3844,25 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)
> * we add to pkt_len the headers size of all segments
> */
> if (shinfo->gso_size && skb_transport_header_was_set(skb)) {
> - u16 gso_segs = shinfo->gso_segs;
> unsigned int hdr_len;
> + u16 gso_segs = shinfo->gso_segs;
>
> /* mac layer + network layer */
> - hdr_len = skb_transport_offset(skb);
> + hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
>
> /* + transport layer */
> if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
> {
> const struct tcphdr *th;
> struct tcphdr _tcphdr;
>
> - th = skb_header_pointer(skb, hdr_len,
> + th = skb_header_pointer(skb, skb_transport_offset(skb),
> sizeof(_tcphdr), &_tcphdr);
> if (likely(th))
> hdr_len += __tcp_hdrlen(th);
> } else if (shinfo->gso_type & SKB_GSO_UDP_L4) {
> struct udphdr _udphdr;
>
> - if (skb_header_pointer(skb, hdr_len,
> + if (skb_header_pointer(skb, skb_transport_offset(skb),
> sizeof(_udphdr), &_udphdr))
> hdr_len += sizeof(struct udphdr);
> }
> @@ -3987,6 +4038,50 @@ int dev_loopback_xmit(struct net *net, struct sock
> *sk, struct sk_buff *skb)
> EXPORT_SYMBOL(dev_loopback_xmit);
>
> #ifdef CONFIG_NET_EGRESS
> +static struct sk_buff *
> +sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
> +{
> +#ifdef CONFIG_NET_CLS_ACT
> + struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
> + struct tcf_result cl_res;
> +
> + if (!miniq)
> + return skb;
> +
> + /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
> + tc_skb_cb(skb)->mru = 0;
> + tc_skb_cb(skb)->post_ct = false;
> + mini_qdisc_bstats_cpu_update(miniq, skb);
> +
> + switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res,
> false)) {
> + case TC_ACT_OK:
> + case TC_ACT_RECLASSIFY:
> + skb->tc_index = TC_H_MIN(cl_res.classid);
> + break;
> + case TC_ACT_SHOT:
> + mini_qdisc_qstats_cpu_drop(miniq);
> + *ret = NET_XMIT_DROP;
> + kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS);
> + return NULL;
> + case TC_ACT_STOLEN:
> + case TC_ACT_QUEUED:
> + case TC_ACT_TRAP:
> + *ret = NET_XMIT_SUCCESS;
> + consume_skb(skb);
> + return NULL;
> + case TC_ACT_REDIRECT:
> + /* No need to push/pop skb's mac_header here on egress! */
> + skb_do_redirect(skb);
> + *ret = NET_XMIT_SUCCESS;
> + return NULL;
> + default:
> + break;
> + }
> +#endif /* CONFIG_NET_CLS_ACT */
> +
> + return skb;
> +}
> +
> static struct netdev_queue *
> netdev_tx_queue_mapping(struct net_device *dev, struct sk_buff *skb)
> {
> @@ -4007,182 +4102,6 @@ void netdev_xmit_skip_txqueue(bool skip)
> EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue);
> #endif /* CONFIG_NET_EGRESS */
>
> -#ifdef CONFIG_NET_XGRESS
> -static int tc_run(struct tcx_entry *entry, struct sk_buff *skb)
> -{
> - int ret = TC_ACT_UNSPEC;
> -#ifdef CONFIG_NET_CLS_ACT
> - struct mini_Qdisc *miniq = rcu_dereference_bh(entry->miniq);
> - struct tcf_result res;
> -
> - if (!miniq)
> - return ret;
> -
> - tc_skb_cb(skb)->mru = 0;
> - tc_skb_cb(skb)->post_ct = false;
> -
> - mini_qdisc_bstats_cpu_update(miniq, skb);
> - ret = tcf_classify(skb, miniq->block, miniq->filter_list, &res, false);
> - /* Only tcf related quirks below. */
> - switch (ret) {
> - case TC_ACT_SHOT:
> - mini_qdisc_qstats_cpu_drop(miniq);
> - break;
> - case TC_ACT_OK:
> - case TC_ACT_RECLASSIFY:
> - skb->tc_index = TC_H_MIN(res.classid);
> - break;
> - }
> -#endif /* CONFIG_NET_CLS_ACT */
> - return ret;
> -}
> -
> -static DEFINE_STATIC_KEY_FALSE(tcx_needed_key);
> -
> -void tcx_inc(void)
> -{
> - static_branch_inc(&tcx_needed_key);
> -}
> -
> -void tcx_dec(void)
> -{
> - static_branch_dec(&tcx_needed_key);
> -}
> -
> -static __always_inline enum tcx_action_base
> -tcx_run(const struct bpf_mprog_entry *entry, struct sk_buff *skb,
> - const bool needs_mac)
> -{
> - const struct bpf_mprog_fp *fp;
> - const struct bpf_prog *prog;
> - int ret = TCX_NEXT;
> -
> - if (needs_mac)
> - __skb_push(skb, skb->mac_len);
> - bpf_mprog_foreach_prog(entry, fp, prog) {
> - bpf_compute_data_pointers(skb);
> - ret = bpf_prog_run(prog, skb);
> - if (ret != TCX_NEXT)
> - break;
> - }
> - if (needs_mac)
> - __skb_pull(skb, skb->mac_len);
> - return tcx_action_code(skb, ret);
> -}
> -
> -static __always_inline struct sk_buff *
> -sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int
> *ret,
> - struct net_device *orig_dev, bool *another)
> -{
> - struct bpf_mprog_entry *entry =
> rcu_dereference_bh(skb->dev->tcx_ingress);
> - int sch_ret;
> -
> - if (!entry)
> - return skb;
> - if (*pt_prev) {
> - *ret = deliver_skb(skb, *pt_prev, orig_dev);
> - *pt_prev = NULL;
> - }
> -
> - qdisc_skb_cb(skb)->pkt_len = skb->len;
> - tcx_set_ingress(skb, true);
> -
> - if (static_branch_unlikely(&tcx_needed_key)) {
> - sch_ret = tcx_run(entry, skb, true);
> - if (sch_ret != TC_ACT_UNSPEC)
> - goto ingress_verdict;
> - }
> - sch_ret = tc_run(tcx_entry(entry), skb);
> -ingress_verdict:
> - switch (sch_ret) {
> - case TC_ACT_REDIRECT:
> - /* skb_mac_header check was done by BPF, so we can safely
> - * push the L2 header back before redirecting to another
> - * netdev.
> - */
> - __skb_push(skb, skb->mac_len);
> - if (skb_do_redirect(skb) == -EAGAIN) {
> - __skb_pull(skb, skb->mac_len);
> - *another = true;
> - break;
> - }
> - *ret = NET_RX_SUCCESS;
> - return NULL;
> - case TC_ACT_SHOT:
> - kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS);
> - *ret = NET_RX_DROP;
> - return NULL;
> - /* used by tc_run */
> - case TC_ACT_STOLEN:
> - case TC_ACT_QUEUED:
> - case TC_ACT_TRAP:
> - consume_skb(skb);
> - fallthrough;
> - case TC_ACT_CONSUMED:
> - *ret = NET_RX_SUCCESS;
> - return NULL;
> - }
> -
> - return skb;
> -}
> -
> -static __always_inline struct sk_buff *
> -sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
> -{
> - struct bpf_mprog_entry *entry = rcu_dereference_bh(dev->tcx_egress);
> - int sch_ret;
> -
> - if (!entry)
> - return skb;
> -
> - /* qdisc_skb_cb(skb)->pkt_len & tcx_set_ingress() was
> - * already set by the caller.
> - */
> - if (static_branch_unlikely(&tcx_needed_key)) {
> - sch_ret = tcx_run(entry, skb, false);
> - if (sch_ret != TC_ACT_UNSPEC)
> - goto egress_verdict;
> - }
> - sch_ret = tc_run(tcx_entry(entry), skb);
> -egress_verdict:
> - switch (sch_ret) {
> - case TC_ACT_REDIRECT:
> - /* No need to push/pop skb's mac_header here on egress! */
> - skb_do_redirect(skb);
> - *ret = NET_XMIT_SUCCESS;
> - return NULL;
> - case TC_ACT_SHOT:
> - kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS);
> - *ret = NET_XMIT_DROP;
> - return NULL;
> - /* used by tc_run */
> - case TC_ACT_STOLEN:
> - case TC_ACT_QUEUED:
> - case TC_ACT_TRAP:
> - consume_skb(skb);
> - fallthrough;
> - case TC_ACT_CONSUMED:
> - *ret = NET_XMIT_SUCCESS;
> - return NULL;
> - }
> -
> - return skb;
> -}
> -#else
> -static __always_inline struct sk_buff *
> -sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int
> *ret,
> - struct net_device *orig_dev, bool *another)
> -{
> - return skb;
> -}
> -
> -static __always_inline struct sk_buff *
> -sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
> -{
> - return skb;
> -}
> -#endif /* CONFIG_NET_XGRESS */
> -
> #ifdef CONFIG_XPS
> static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
> struct xps_dev_maps *dev_maps, unsigned int tci)
> @@ -4365,7 +4284,9 @@ int __dev_queue_xmit(struct sk_buff *skb, struct
> net_device *sb_dev)
> skb_update_prio(skb);
>
> qdisc_pkt_len_init(skb);
> - tcx_set_ingress(skb, false);
> +#ifdef CONFIG_NET_CLS_ACT
> + skb->tc_at_ingress = 0;
> +#endif
> #ifdef CONFIG_NET_EGRESS
> if (static_branch_unlikely(&egress_needed_key)) {
> if (nf_hook_egress_active()) {
> @@ -4552,12 +4473,7 @@ static inline void ____napi_schedule(struct
> softnet_data *sd,
> }
>
> list_add_tail(&napi->poll_list, &sd->poll_list);
> - WRITE_ONCE(napi->list_owner, smp_processor_id());
> - /* If not called from net_rx_action()
> - * we have to raise NET_RX_SOFTIRQ.
> - */
> - if (!sd->in_net_rx_action)
> - raise_softirq_irqoff(NET_RX_SOFTIRQ);
> + __raise_softirq_irqoff(NET_RX_SOFTIRQ);
> }
>
> #ifdef CONFIG_RPS
> @@ -4771,17 +4687,21 @@ static void rps_trigger_softirq(void *data)
>
> #endif /* CONFIG_RPS */
>
> +/* Called from hardirq (IPI) context */
> +static void trigger_rx_softirq(void *data)
> +{
> + struct softnet_data *sd = data;
> +
> + __raise_softirq_irqoff(NET_RX_SOFTIRQ);
> + smp_store_release(&sd->defer_ipi_scheduled, 0);
> +}
> +
> /*
> - * After we queued a packet into sd->input_pkt_queue,
> - * we need to make sure this queue is serviced soon.
> - *
> - * - If this is another cpu queue, link it to our rps_ipi_list,
> - * and make sure we will process rps_ipi_list from net_rx_action().
> - *
> - * - If this is our own queue, NAPI schedule our backlog.
> - * Note that this also raises NET_RX_SOFTIRQ.
> + * Check if this softnet_data structure is another cpu one
> + * If yes, queue it to our IPI list and return 1
> + * If no, return 0
> */
> -static void napi_schedule_rps(struct softnet_data *sd)
> +static int napi_schedule_rps(struct softnet_data *sd)
> {
> struct softnet_data *mysd = this_cpu_ptr(&softnet_data);
>
> @@ -4790,15 +4710,12 @@ static void napi_schedule_rps(struct softnet_data *sd)
> sd->rps_ipi_next = mysd->rps_ipi_list;
> mysd->rps_ipi_list = sd;
>
> - /* If not called from net_rx_action() or napi_threaded_poll()
> - * we have to raise NET_RX_SOFTIRQ.
> - */
> - if (!mysd->in_net_rx_action && !mysd->in_napi_threaded_poll)
> - __raise_softirq_irqoff(NET_RX_SOFTIRQ);
> - return;
> + __raise_softirq_irqoff(NET_RX_SOFTIRQ);
> + return 1;
> }
> #endif /* CONFIG_RPS */
> __napi_schedule_irqoff(&mysd->backlog);
> + return 0;
> }
>
> #ifdef CONFIG_NET_FLOW_LIMIT
> @@ -5218,17 +5135,16 @@ static __latent_entropy void net_tx_action(struct
> softirq_action *h)
> clist = clist->next;
>
> WARN_ON(refcount_read(&skb->users));
> - if (likely(get_kfree_skb_cb(skb)->reason ==
> SKB_CONSUMED))
> - trace_consume_skb(skb, net_tx_action);
> + if (likely(get_kfree_skb_cb(skb)->reason ==
> SKB_REASON_CONSUMED))
> + trace_consume_skb(skb);
> else
> trace_kfree_skb(skb, net_tx_action,
> - get_kfree_skb_cb(skb)->reason);
> + SKB_DROP_REASON_NOT_SPECIFIED);
>
> if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
> __kfree_skb(skb);
> else
> - __napi_kfree_skb(skb,
> - get_kfree_skb_cb(skb)->reason);
> + __kfree_skb_defer(skb);
> }
> }
>
> @@ -5290,6 +5206,72 @@ int (*br_fdb_test_addr_hook)(struct net_device *dev,
> EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
> #endif
>
> +static inline struct sk_buff *
> +sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int
> *ret,
> + struct net_device *orig_dev, bool *another)
> +{
> +#ifdef CONFIG_NET_CLS_ACT
> + struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
> + struct tcf_result cl_res;
> +
> + /* If there's at least one ingress present somewhere (so
> + * we get here via enabled static key), remaining devices
> + * that are not configured with an ingress qdisc will bail
> + * out here.
> + */
> + if (!miniq)
> + return skb;
> +
> + if (*pt_prev) {
> + *ret = deliver_skb(skb, *pt_prev, orig_dev);
> + *pt_prev = NULL;
> + }
> +
> + qdisc_skb_cb(skb)->pkt_len = skb->len;
> + tc_skb_cb(skb)->mru = 0;
> + tc_skb_cb(skb)->post_ct = false;
> + skb->tc_at_ingress = 1;
> + mini_qdisc_bstats_cpu_update(miniq, skb);
> +
> + switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res,
> false)) {
> + case TC_ACT_OK:
> + case TC_ACT_RECLASSIFY:
> + skb->tc_index = TC_H_MIN(cl_res.classid);
> + break;
> + case TC_ACT_SHOT:
> + mini_qdisc_qstats_cpu_drop(miniq);
> + kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS);
> + *ret = NET_RX_DROP;
> + return NULL;
> + case TC_ACT_STOLEN:
> + case TC_ACT_QUEUED:
> + case TC_ACT_TRAP:
> + consume_skb(skb);
> + *ret = NET_RX_SUCCESS;
> + return NULL;
> + case TC_ACT_REDIRECT:
> + /* skb_mac_header check was done by cls/act_bpf, so
> + * we can safely push the L2 header back before
> + * redirecting to another netdev
> + */
> + __skb_push(skb, skb->mac_len);
> + if (skb_do_redirect(skb) == -EAGAIN) {
> + __skb_pull(skb, skb->mac_len);
> + *another = true;
> + break;
> + }
> + *ret = NET_RX_SUCCESS;
> + return NULL;
> + case TC_ACT_CONSUMED:
> + *ret = NET_RX_SUCCESS;
> + return NULL;
> + default:
> + break;
> + }
> +#endif /* CONFIG_NET_CLS_ACT */
> + return skb;
> +}
> +
> /**
> * netdev_is_rx_handler_busy - check if receive handler is registered
> * @dev: device to check
> @@ -6109,9 +6091,10 @@ EXPORT_SYMBOL(__napi_schedule);
> */
> bool napi_schedule_prep(struct napi_struct *n)
> {
> - unsigned long new, val = READ_ONCE(n->state);
> + unsigned long val, new;
>
> do {
> + val = READ_ONCE(n->state);
> if (unlikely(val & NAPIF_STATE_DISABLE))
> return false;
> new = val | NAPIF_STATE_SCHED;
> @@ -6124,7 +6107,7 @@ bool napi_schedule_prep(struct napi_struct *n)
> */
> new |= (val & NAPIF_STATE_SCHED) / NAPIF_STATE_SCHED *
> NAPIF_STATE_MISSED;
> - } while (!try_cmpxchg(&n->state, &val, new));
> + } while (cmpxchg(&n->state, val, new) != val);
>
> return !(val & NAPIF_STATE_SCHED);
> }
> @@ -6191,10 +6174,10 @@ bool napi_complete_done(struct napi_struct *n, int
> work_done)
> list_del_init(&n->poll_list);
> local_irq_restore(flags);
> }
> - WRITE_ONCE(n->list_owner, -1);
>
> - val = READ_ONCE(n->state);
> do {
> + val = READ_ONCE(n->state);
> +
> WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
>
> new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED |
> @@ -6207,7 +6190,7 @@ bool napi_complete_done(struct napi_struct *n, int
> work_done)
> */
> new |= (val & NAPIF_STATE_MISSED) / NAPIF_STATE_MISSED *
> NAPIF_STATE_SCHED;
> - } while (!try_cmpxchg(&n->state, &val, new));
> + } while (cmpxchg(&n->state, val, new) != val);
>
> if (unlikely(val & NAPIF_STATE_MISSED)) {
> __napi_schedule(n);
> @@ -6318,8 +6301,7 @@ void napi_busy_loop(unsigned int napi_id,
> if (!napi)
> goto out;
>
> - if (!IS_ENABLED(CONFIG_PREEMPT_RT))
> - preempt_disable();
> + preempt_disable();
> for (;;) {
> int work = 0;
>
> @@ -6361,8 +6343,7 @@ void napi_busy_loop(unsigned int napi_id,
> if (unlikely(need_resched())) {
> if (napi_poll)
> busy_poll_stop(napi, have_poll_lock,
> prefer_busy_poll, budget);
> - if (!IS_ENABLED(CONFIG_PREEMPT_RT))
> - preempt_enable();
> + preempt_enable();
> rcu_read_unlock();
> cond_resched();
> if (loop_end(loop_end_arg, start_time))
> @@ -6373,8 +6354,7 @@ void napi_busy_loop(unsigned int napi_id,
> }
> if (napi_poll)
> busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget);
> - if (!IS_ENABLED(CONFIG_PREEMPT_RT))
> - preempt_enable();
> + preempt_enable();
> out:
> rcu_read_unlock();
> }
> @@ -6476,8 +6456,12 @@ int dev_set_threaded(struct net_device *dev, bool
> threaded)
> * softirq mode will happen in the next round of napi_schedule().
> * This should not cause hiccups/stalls to the live traffic.
> */
> - list_for_each_entry(napi, &dev->napi_list, dev_list)
> - assign_bit(NAPI_STATE_THREADED, &napi->state, threaded);
> + list_for_each_entry(napi, &dev->napi_list, dev_list) {
> + if (threaded)
> + set_bit(NAPI_STATE_THREADED, &napi->state);
> + else
> + clear_bit(NAPI_STATE_THREADED, &napi->state);
> + }
>
> return err;
> }
> @@ -6506,7 +6490,6 @@ void netif_napi_add_weight(struct net_device *dev,
> struct napi_struct *napi,
> #ifdef CONFIG_NETPOLL
> napi->poll_owner = -1;
> #endif
> - napi->list_owner = -1;
> set_bit(NAPI_STATE_SCHED, &napi->state);
> set_bit(NAPI_STATE_NPSVC, &napi->state);
> list_add_rcu(&napi->dev_list, &dev->napi_list);
> @@ -6528,16 +6511,19 @@ void napi_disable(struct napi_struct *n)
> might_sleep();
> set_bit(NAPI_STATE_DISABLE, &n->state);
>
> - val = READ_ONCE(n->state);
> - do {
> - while (val & (NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC)) {
> + for ( ; ; ) {
> + val = READ_ONCE(n->state);
> + if (val & (NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC)) {
> usleep_range(20, 200);
> - val = READ_ONCE(n->state);
> + continue;
> }
>
> new = val | NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC;
> new &= ~(NAPIF_STATE_THREADED | NAPIF_STATE_PREFER_BUSY_POLL);
> - } while (!try_cmpxchg(&n->state, &val, new));
> +
> + if (cmpxchg(&n->state, val, new) == val)
> + break;
> + }
>
> hrtimer_cancel(&n->timer);
>
> @@ -6554,15 +6540,16 @@ EXPORT_SYMBOL(napi_disable);
> */
> void napi_enable(struct napi_struct *n)
> {
> - unsigned long new, val = READ_ONCE(n->state);
> + unsigned long val, new;
>
> do {
> + val = READ_ONCE(n->state);
> BUG_ON(!test_bit(NAPI_STATE_SCHED, &val));
>
> new = val & ~(NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC);
> if (n->dev->threaded && n->thread)
> new |= NAPIF_STATE_THREADED;
> - } while (!try_cmpxchg(&n->state, &val, new));
> + } while (cmpxchg(&n->state, val, new) != val);
> }
> EXPORT_SYMBOL(napi_enable);
>
> @@ -6718,57 +6705,9 @@ static int napi_thread_wait(struct napi_struct *napi)
> return -1;
> }
>
> -static void skb_defer_free_flush(struct softnet_data *sd)
> -{
> - struct sk_buff *skb, *next;
> -
> - /* Paired with WRITE_ONCE() in skb_attempt_defer_free() */
> - if (!READ_ONCE(sd->defer_list))
> - return;
> -
> - spin_lock(&sd->defer_lock);
> - skb = sd->defer_list;
> - sd->defer_list = NULL;
> - sd->defer_count = 0;
> - spin_unlock(&sd->defer_lock);
> -
> - while (skb != NULL) {
> - next = skb->next;
> - napi_consume_skb(skb, 1);
> - skb = next;
> - }
> -}
> -
> -#ifndef CONFIG_PREEMPT_RT
> -
> -/* Called from hardirq (IPI) context */
> -static void trigger_rx_softirq(void *data)
> -{
> - struct softnet_data *sd = data;
> -
> - __raise_softirq_irqoff(NET_RX_SOFTIRQ);
> - smp_store_release(&sd->defer_ipi_scheduled, 0);
> -}
> -
> -#else
> -
> -static void trigger_rx_softirq(struct work_struct *defer_work)
> -{
> - struct softnet_data *sd;
> -
> - sd = container_of(defer_work, struct softnet_data, defer_work);
> - smp_store_release(&sd->defer_ipi_scheduled, 0);
> - local_bh_disable();
> - skb_defer_free_flush(sd);
> - local_bh_enable();
> -}
> -
> -#endif
> -
> static int napi_threaded_poll(void *data)
> {
> struct napi_struct *napi = data;
> - struct softnet_data *sd;
> void *have;
>
> while (!napi_thread_wait(napi)) {
> @@ -6778,21 +6717,11 @@ static int napi_threaded_poll(void *data)
> bool repoll = false;
>
> local_bh_disable();
> - sd = this_cpu_ptr(&softnet_data);
> - sd->in_napi_threaded_poll = true;
>
> have = netpoll_poll_lock(napi);
> __napi_poll(napi, &repoll);
> netpoll_poll_unlock(have);
>
> - sd->in_napi_threaded_poll = false;
> - barrier();
> -
> - if (sd_has_rps_ipi_waiting(sd)) {
> - local_irq_disable();
> - net_rps_action_and_irq_enable(sd);
> - }
> - skb_defer_free_flush(sd);
> local_bh_enable();
>
> if (!repoll)
> @@ -6805,6 +6734,28 @@ static int napi_threaded_poll(void *data)
> return 0;
> }
>
> +static void skb_defer_free_flush(struct softnet_data *sd)
> +{
> + struct sk_buff *skb, *next;
> + unsigned long flags;
> +
> + /* Paired with WRITE_ONCE() in skb_attempt_defer_free() */
> + if (!READ_ONCE(sd->defer_list))
> + return;
> +
> + spin_lock_irqsave(&sd->defer_lock, flags);
> + skb = sd->defer_list;
> + sd->defer_list = NULL;
> + sd->defer_count = 0;
> + spin_unlock_irqrestore(&sd->defer_lock, flags);
> +
> + while (skb != NULL) {
> + next = skb->next;
> + napi_consume_skb(skb, 1);
> + skb = next;
> + }
> +}
> +
> static __latent_entropy void net_rx_action(struct softirq_action *h)
> {
> struct softnet_data *sd = this_cpu_ptr(&softnet_data);
> @@ -6814,8 +6765,6 @@ static __latent_entropy void net_rx_action(struct
> softirq_action *h)
> LIST_HEAD(list);
> LIST_HEAD(repoll);
>
> -start:
> - sd->in_net_rx_action = true;
> local_irq_disable();
> list_splice_init(&sd->poll_list, &list);
> local_irq_enable();
> @@ -6826,18 +6775,8 @@ static __latent_entropy void net_rx_action(struct
> softirq_action *h)
> skb_defer_free_flush(sd);
>
> if (list_empty(&list)) {
> - if (list_empty(&repoll)) {
> - sd->in_net_rx_action = false;
> - barrier();
> - /* We need to check if ____napi_schedule()
> - * had refilled poll_list while
> - * sd->in_net_rx_action was true.
> - */
> - if (!list_empty(&sd->poll_list))
> - goto start;
> - if (!sd_has_rps_ipi_waiting(sd))
> - goto end;
> - }
> + if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
> + goto end;
> break;
> }
>
> @@ -6862,8 +6801,6 @@ static __latent_entropy void net_rx_action(struct
> softirq_action *h)
> list_splice(&list, &sd->poll_list);
> if (!list_empty(&sd->poll_list))
> __raise_softirq_irqoff(NET_RX_SOFTIRQ);
> - else
> - sd->in_net_rx_action = false;
>
> net_rps_action_and_irq_enable(sd);
> end:;
> @@ -8503,8 +8440,9 @@ static int __dev_set_promiscuity(struct net_device
> *dev, int inc, bool notify)
> }
> }
> if (dev->flags != old_flags) {
> - netdev_info(dev, "%s promiscuous mode\n",
> - dev->flags & IFF_PROMISC ? "entered" : "left");
> + pr_info("device %s %s promiscuous mode\n",
> + dev->name,
> + dev->flags & IFF_PROMISC ? "entered" : "left");
> if (audit_enabled) {
> current_uid_gid(&uid, &gid);
> audit_log(audit_context(), GFP_ATOMIC,
> @@ -8521,7 +8459,7 @@ static int __dev_set_promiscuity(struct net_device
> *dev, int inc, bool notify)
> dev_change_rx_flags(dev, IFF_PROMISC);
> }
> if (notify)
> - __dev_notify_flags(dev, old_flags, IFF_PROMISC, 0, NULL);
> + __dev_notify_flags(dev, old_flags, IFF_PROMISC);
> return 0;
> }
>
> @@ -8572,13 +8510,11 @@ static int __dev_set_allmulti(struct net_device *dev,
> int inc, bool notify)
> }
> }
> if (dev->flags ^ old_flags) {
> - netdev_info(dev, "%s allmulticast mode\n",
> - dev->flags & IFF_ALLMULTI ? "entered" : "left");
> dev_change_rx_flags(dev, IFF_ALLMULTI);
> dev_set_rx_mode(dev);
> if (notify)
> __dev_notify_flags(dev, old_flags,
> - dev->gflags ^ old_gflags, 0, NULL);
> + dev->gflags ^ old_gflags);
> }
> return 0;
> }
> @@ -8741,13 +8677,12 @@ int __dev_change_flags(struct net_device *dev,
> unsigned int flags,
> }
>
> void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
> - unsigned int gchanges, u32 portid,
> - const struct nlmsghdr *nlh)
> + unsigned int gchanges)
> {
> unsigned int changes = dev->flags ^ old_flags;
>
> if (gchanges)
> - rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC, portid,
> nlh);
> + rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC);
>
> if (changes & IFF_UP) {
> if (dev->flags & IFF_UP)
> @@ -8789,7 +8724,7 @@ int dev_change_flags(struct net_device *dev, unsigned
> int flags,
> return ret;
>
> changes = (old_flags ^ dev->flags) | (old_gflags ^ dev->gflags);
> - __dev_notify_flags(dev, old_flags, changes, 0, NULL);
> + __dev_notify_flags(dev, old_flags, changes);
> return ret;
> }
> EXPORT_SYMBOL(dev_change_flags);
> @@ -8969,11 +8904,9 @@ int dev_set_mac_address(struct net_device *dev, struct
> sockaddr *sa,
> err = dev_pre_changeaddr_notify(dev, sa->sa_data, extack);
> if (err)
> return err;
> - if (memcmp(dev->dev_addr, sa->sa_data, dev->addr_len)) {
> - err = ops->ndo_set_mac_address(dev, sa);
> - if (err)
> - return err;
> - }
> + err = ops->ndo_set_mac_address(dev, sa);
> + if (err)
> + return err;
> dev->addr_assign_type = NET_ADDR_SET;
> call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
> add_device_randomness(dev->dev_addr, dev->addr_len);
> @@ -9411,16 +9344,8 @@ static int dev_xdp_attach(struct net_device *dev,
> struct netlink_ext_ack *extack
> NL_SET_ERR_MSG(extack, "Native and generic XDP can't be
> active at the same time");
> return -EEXIST;
> }
> - if (!offload && bpf_prog_is_offloaded(new_prog->aux)) {
> - NL_SET_ERR_MSG(extack, "Using offloaded program without
> HW_MODE flag is not supported");
> - return -EINVAL;
> - }
> - if (bpf_prog_is_dev_bound(new_prog->aux) &&
> !bpf_offload_dev_match(new_prog, dev)) {
> - NL_SET_ERR_MSG(extack, "Program bound to different
> device");
> - return -EINVAL;
> - }
> - if (bpf_prog_is_dev_bound(new_prog->aux) && mode ==
> XDP_MODE_SKB) {
> - NL_SET_ERR_MSG(extack, "Can't attach device-bound
> programs in generic mode");
> + if (!offload && bpf_prog_is_dev_bound(new_prog->aux)) {
> + NL_SET_ERR_MSG(extack, "Using device-bound program
> without HW_MODE flag is not supported");
> return -EINVAL;
> }
> if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) {
> @@ -9602,7 +9527,6 @@ int bpf_xdp_link_attach(const union bpf_attr *attr,
> struct bpf_prog *prog)
> {
> struct net *net = current->nsproxy->net_ns;
> struct bpf_link_primer link_primer;
> - struct netlink_ext_ack extack = {};
> struct bpf_xdp_link *link;
> struct net_device *dev;
> int err, fd;
> @@ -9630,13 +9554,12 @@ int bpf_xdp_link_attach(const union bpf_attr *attr,
> struct bpf_prog *prog)
> goto unlock;
> }
>
> - err = dev_xdp_attach_link(dev, &extack, link);
> + err = dev_xdp_attach_link(dev, NULL, link);
> rtnl_unlock();
>
> if (err) {
> link->dev = NULL;
> bpf_link_cleanup(&link_primer);
> - trace_bpf_xdp_link_attach_failed(extack._msg);
> goto out_put_dev;
> }
>
> @@ -9700,40 +9623,23 @@ int dev_change_xdp_fd(struct net_device *dev, struct
> netlink_ext_ack *extack,
> }
>
> /**
> - * dev_index_reserve() - allocate an ifindex in a namespace
> - * @net: the applicable net namespace
> - * @ifindex: requested ifindex, pass %0 to get one allocated
> + * dev_new_index - allocate an ifindex
> + * @net: the applicable net namespace
> *
> - * Allocate a ifindex for a new device. Caller must either use the ifindex
> - * to store the device (via list_netdevice()) or call dev_index_release()
> - * to give the index up.
> - *
> - * Return: a suitable unique value for a new device interface number or
> -errno.
> + * Returns a suitable unique value for a new device interface
> + * number. The caller must hold the rtnl semaphore or the
> + * dev_base_lock to be sure it remains unique.
> */
> -static int dev_index_reserve(struct net *net, u32 ifindex)
> +static int dev_new_index(struct net *net)
> {
> - int err;
> + int ifindex = net->ifindex;
>
> - if (ifindex > INT_MAX) {
> - DEBUG_NET_WARN_ON_ONCE(1);
> - return -EINVAL;
> + for (;;) {
> + if (++ifindex <= 0)
> + ifindex = 1;
> + if (!__dev_get_by_index(net, ifindex))
> + return net->ifindex = ifindex;
> }
> -
> - if (!ifindex)
> - err = xa_alloc_cyclic(&net->dev_by_index, &ifindex, NULL,
> - xa_limit_31b, &net->ifindex, GFP_KERNEL);
> - else
> - err = xa_insert(&net->dev_by_index, ifindex, NULL, GFP_KERNEL);
> - if (err < 0)
> - return err;
> -
> - return ifindex;
> -}
> -
> -static void dev_index_release(struct net *net, int ifindex)
> -{
> - /* Expect only unused indexes, unlist_netdevice() removes the used */
> - WARN_ON(xa_erase(&net->dev_by_index, ifindex));
> }
>
> /* Delayed registration/unregisteration */
> @@ -10255,10 +10161,11 @@ int register_netdevice(struct net_device *dev)
> if (ret)
> goto err_uninit;
>
> - ret = dev_index_reserve(net, dev->ifindex);
> - if (ret < 0)
> + ret = -EBUSY;
> + if (!dev->ifindex)
> + dev->ifindex = dev_new_index(net);
> + else if (__dev_get_by_index(net, dev->ifindex))
> goto err_free_pcpu;
> - dev->ifindex = ret;
>
> /* Transfer changeable features to wanted_features and enable
> * software offloads (GSO and GRO).
> @@ -10305,14 +10212,14 @@ int register_netdevice(struct net_device *dev)
> ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
> ret = notifier_to_errno(ret);
> if (ret)
> - goto err_ifindex_release;
> + goto err_free_pcpu;
>
> ret = netdev_register_kobject(dev);
> write_lock(&dev_base_lock);
> dev->reg_state = ret ? NETREG_UNREGISTERED : NETREG_REGISTERED;
> write_unlock(&dev_base_lock);
> if (ret)
> - goto err_uninit_notify;
> + goto err_free_pcpu;
>
> __netdev_update_features(dev);
>
> @@ -10354,15 +10261,11 @@ int register_netdevice(struct net_device *dev)
> */
> if (!dev->rtnl_link_ops ||
> dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
> - rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL, 0, NULL);
> + rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
>
> out:
> return ret;
>
> -err_uninit_notify:
> - call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev);
> -err_ifindex_release:
> - dev_index_release(net, dev->ifindex);
> err_free_pcpu:
> netdev_do_free_pcpu_stats(dev);
> err_uninit:
> @@ -10730,12 +10633,12 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64
> *s,
>
> stats = per_cpu_ptr(netstats, cpu);
> do {
> - start = u64_stats_fetch_begin(&stats->syncp);
> + start = u64_stats_fetch_begin_irq(&stats->syncp);
> rx_packets = u64_stats_read(&stats->rx_packets);
> rx_bytes = u64_stats_read(&stats->rx_bytes);
> tx_packets = u64_stats_read(&stats->tx_packets);
> tx_bytes = u64_stats_read(&stats->tx_bytes);
> - } while (u64_stats_fetch_retry(&stats->syncp, start));
> + } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
>
> s->rx_packets += rx_packets;
> s->rx_bytes += rx_bytes;
> @@ -10788,24 +10691,6 @@ void netdev_set_default_ethtool_ops(struct
> net_device *dev,
> }
> EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
>
> -/**
> - * netdev_sw_irq_coalesce_default_on() - enable SW IRQ coalescing by default
> - * @dev: netdev to enable the IRQ coalescing on
> - *
> - * Sets a conservative default for SW IRQ coalescing. Users can use
> - * sysfs attributes to override the default values.
> - */
> -void netdev_sw_irq_coalesce_default_on(struct net_device *dev)
> -{
> - WARN_ON(dev->reg_state == NETREG_REGISTERED);
> -
> - if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
> - dev->gro_flush_timeout = 20000;
> - dev->napi_defer_hard_irqs = 1;
> - }
> -}
> -EXPORT_SYMBOL_GPL(netdev_sw_irq_coalesce_default_on);
> -
> void netdev_freemem(struct net_device *dev)
> {
> char *addr = (char *)dev - dev->padded;
> @@ -10863,7 +10748,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv,
> const char *name,
> dev = PTR_ALIGN(p, NETDEV_ALIGN);
> dev->padded = (char *)dev - (char *)p;
>
> - ref_tracker_dir_init(&dev->refcnt_tracker, 128, name);
> + ref_tracker_dir_init(&dev->refcnt_tracker, 128);
> #ifdef CONFIG_PCPU_DEV_REFCNT
> dev->pcpu_refcnt = alloc_percpu(int);
> if (!dev->pcpu_refcnt)
> @@ -10882,11 +10767,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv,
> const char *name,
> dev_net_set(dev, &init_net);
>
> dev->gso_max_size = GSO_LEGACY_MAX_SIZE;
> - dev->xdp_zc_max_segs = 1;
> dev->gso_max_segs = GSO_MAX_SEGS;
> dev->gro_max_size = GRO_LEGACY_MAX_SIZE;
> - dev->gso_ipv4_max_size = GSO_LEGACY_MAX_SIZE;
> - dev->gro_ipv4_max_size = GRO_LEGACY_MAX_SIZE;
> dev->tso_max_size = TSO_LEGACY_MAX_SIZE;
> dev->tso_max_segs = TSO_MAX_SEGS;
> dev->upper_level = 1;
> @@ -11054,8 +10936,14 @@ void unregister_netdevice_queue(struct net_device
> *dev, struct list_head *head)
> }
> EXPORT_SYMBOL(unregister_netdevice_queue);
>
> -void unregister_netdevice_many_notify(struct list_head *head,
> - u32 portid, const struct nlmsghdr *nlh)
> +/**
> + * unregister_netdevice_many - unregister many devices
> + * @head: list of devices
> + *
> + * Note: As most callers use a stack allocated list_head,
> + * we force a list_del() to make sure stack wont be corrupted later.
> + */
> +void unregister_netdevice_many(struct list_head *head)
> {
> struct net_device *dev, *tmp;
> LIST_HEAD(close_head);
> @@ -11104,9 +10992,8 @@ void unregister_netdevice_many_notify(struct
> list_head *head,
>
> /* Shutdown queueing discipline. */
> dev_shutdown(dev);
> - dev_tcx_uninstall(dev);
> +
> dev_xdp_uninstall(dev);
> - bpf_dev_bound_netdev_unregister(dev);
>
> netdev_offload_xstats_disable_all(dev);
>
> @@ -11118,8 +11005,7 @@ void unregister_netdevice_many_notify(struct
> list_head *head,
> if (!dev->rtnl_link_ops ||
> dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
> skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
> - GFP_KERNEL, NULL, 0,
> - portid, nlh);
> + GFP_KERNEL, NULL, 0);
>
> /*
> * Flush the unicast and multicast chains
> @@ -11130,13 +11016,11 @@ void unregister_netdevice_many_notify(struct
> list_head *head,
> netdev_name_node_alt_flush(dev);
> netdev_name_node_free(dev->name_node);
>
> - call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev);
> -
> if (dev->netdev_ops->ndo_uninit)
> dev->netdev_ops->ndo_uninit(dev);
>
> if (skb)
> - rtmsg_ifinfo_send(skb, dev, GFP_KERNEL, portid, nlh);
> + rtmsg_ifinfo_send(skb, dev, GFP_KERNEL);
>
> /* Notifier chain MUST detach us all upper devices. */
> WARN_ON(netdev_has_any_upper_dev(dev));
> @@ -11159,18 +11043,6 @@ void unregister_netdevice_many_notify(struct
> list_head *head,
>
> list_del(head);
> }
> -
> -/**
> - * unregister_netdevice_many - unregister many devices
> - * @head: list of devices
> - *
> - * Note: As most callers use a stack allocated list_head,
> - * we force a list_del() to make sure stack wont be corrupted later.
> - */
> -void unregister_netdevice_many(struct list_head *head)
> -{
> - unregister_netdevice_many_notify(head, 0, NULL);
> -}
> EXPORT_SYMBOL(unregister_netdevice_many);
>
> /**
> @@ -11251,19 +11123,9 @@ int __dev_change_net_namespace(struct net_device
> *dev, struct net *net,
> goto out;
>
> /* Check that new_ifindex isn't used yet. */
> - if (new_ifindex) {
> - err = dev_index_reserve(net, new_ifindex);
> - if (err < 0)
> - goto out;
> - } else {
> - /* If there is an ifindex conflict assign a new one */
> - err = dev_index_reserve(net, dev->ifindex);
> - if (err == -EBUSY)
> - err = dev_index_reserve(net, 0);
> - if (err < 0)
> - goto out;
> - new_ifindex = err;
> - }
> + err = -EBUSY;
> + if (new_ifindex && __dev_get_by_index(net, new_ifindex))
> + goto out;
>
> /*
> * And now a mini version of register_netdevice unregister_netdevice.
> @@ -11291,6 +11153,13 @@ int __dev_change_net_namespace(struct net_device
> *dev, struct net *net,
> rcu_barrier();
>
> new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL);
> + /* If there is an ifindex conflict assign a new one */
> + if (!new_ifindex) {
> + if (__dev_get_by_index(net, dev->ifindex))
> + new_ifindex = dev_new_index(net);
> + else
> + new_ifindex = dev->ifindex;
> + }
>
> rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid,
> new_ifindex);
> @@ -11339,7 +11208,7 @@ int __dev_change_net_namespace(struct net_device
> *dev, struct net *net,
> * Prevent userspace races by waiting until the network
> * device is fully setup before sending notifications.
> */
> - rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL, 0, NULL);
> + rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
>
> synchronize_net();
> err = 0;
> @@ -11471,8 +11340,6 @@ static int __net_init netdev_init(struct net *net)
> if (net->dev_index_head == NULL)
> goto err_idx;
>
> - xa_init_flags(&net->dev_by_index, XA_FLAGS_ALLOC1);
> -
> RAW_INIT_NOTIFIER_HEAD(&net->netdev_chain);
>
> return 0;
> @@ -11570,7 +11437,6 @@ static void __net_exit netdev_exit(struct net *net)
> {
> kfree(net->dev_name_head);
> kfree(net->dev_index_head);
> - xa_destroy(&net->dev_by_index);
> if (net != &init_net)
> WARN_ON_ONCE(!list_empty(&net->dev_base_head));
> }
> @@ -11706,11 +11572,7 @@ static int __init net_dev_init(void)
> INIT_CSD(&sd->csd, rps_trigger_softirq, sd);
> sd->cpu = i;
> #endif
> -#ifndef CONFIG_PREEMPT_RT
> INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd);
> -#else
> - INIT_WORK(&sd->defer_work, trigger_rx_softirq);
> -#endif
> spin_lock_init(&sd->defer_lock);
>
> init_gro_hash(&sd->backlog);
>
> ---
> base-commit: 043f841768688ddc914578849f2b9cc11757a816
> change-id: 20260210-v6-1-octeon-3e6680539e84
>
> Best regards,
> --
> Kevin Hao <[email protected]>
>
-=-=-=-=-=-=-=-=-=-=-=-
Links: You receive all messages sent to this group.
View/Reply Online (#16297):
https://lists.yoctoproject.org/g/linux-yocto/message/16297
Mute This Topic: https://lists.yoctoproject.org/mt/117734674/21656
Group Owner: [email protected]
Unsubscribe: https://lists.yoctoproject.org/g/linux-yocto/unsub
[[email protected]]
-=-=-=-=-=-=-=-=-=-=-=-