commit:     65afc4ed817c7aadd9d005e195dca366d4fbd671
Author:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Sat Feb  5 12:13:03 2022 +0000
Commit:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Sat Feb  5 12:13:03 2022 +0000
URL:        https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=65afc4ed

Linux patch 5.10.97

Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org>

 0000_README              |   4 +
 1096_linux-5.10.97.patch | 938 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 942 insertions(+)

diff --git a/0000_README b/0000_README
index cc530626..20548375 100644
--- a/0000_README
+++ b/0000_README
@@ -427,6 +427,10 @@ Patch:  1095_linux-5.10.96.patch
 From:   http://www.kernel.org
 Desc:   Linux 5.10.96
 
+Patch:  1096_linux-5.10.97.patch
+From:   http://www.kernel.org
+Desc:   Linux 5.10.97
+
 Patch:  1500_XATTR_USER_PREFIX.patch
 From:   https://bugs.gentoo.org/show_bug.cgi?id=470644
 Desc:   Support for namespace user.pax.* on tmpfs.

diff --git a/1096_linux-5.10.97.patch b/1096_linux-5.10.97.patch
new file mode 100644
index 00000000..a02dced7
--- /dev/null
+++ b/1096_linux-5.10.97.patch
@@ -0,0 +1,938 @@
+diff --git a/Documentation/accounting/psi.rst 
b/Documentation/accounting/psi.rst
+index f2b3439edcc2c..860fe651d6453 100644
+--- a/Documentation/accounting/psi.rst
++++ b/Documentation/accounting/psi.rst
+@@ -92,7 +92,8 @@ Triggers can be set on more than one psi metric and more 
than one trigger
+ for the same psi metric can be specified. However for each trigger a separate
+ file descriptor is required to be able to poll it separately from others,
+ therefore for each trigger a separate open() syscall should be made even
+-when opening the same psi interface file.
++when opening the same psi interface file. Write operations to a file 
descriptor
++with an already existing psi trigger will fail with EBUSY.
+ 
+ Monitors activate only when system enters stall state for the monitored
+ psi metric and deactivates upon exit from the stall state. While system is
+diff --git a/Makefile b/Makefile
+index c43133c8a5b1f..9f328bfcaf97d 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,7 +1,7 @@
+ # SPDX-License-Identifier: GPL-2.0
+ VERSION = 5
+ PATCHLEVEL = 10
+-SUBLEVEL = 96
++SUBLEVEL = 97
+ EXTRAVERSION =
+ NAME = Dare mighty things
+ 
+diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
+index 13e10b970ac83..0eb41dce55da3 100644
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -1285,6 +1285,7 @@ struct kvm_x86_ops {
+ };
+ 
+ struct kvm_x86_nested_ops {
++      void (*leave_nested)(struct kvm_vcpu *vcpu);
+       int (*check_events)(struct kvm_vcpu *vcpu);
+       bool (*hv_timer_pending)(struct kvm_vcpu *vcpu);
+       int (*get_state)(struct kvm_vcpu *vcpu,
+diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
+index 2577d78757810..886d4648c9dd4 100644
+--- a/arch/x86/kernel/cpu/mce/intel.c
++++ b/arch/x86/kernel/cpu/mce/intel.c
+@@ -486,6 +486,8 @@ static void intel_ppin_init(struct cpuinfo_x86 *c)
+       case INTEL_FAM6_BROADWELL_X:
+       case INTEL_FAM6_SKYLAKE_X:
+       case INTEL_FAM6_ICELAKE_X:
++      case INTEL_FAM6_ICELAKE_D:
++      case INTEL_FAM6_SAPPHIRERAPIDS_X:
+       case INTEL_FAM6_XEON_PHI_KNL:
+       case INTEL_FAM6_XEON_PHI_KNM:
+ 
+diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
+index f0946872f5e6d..23910e6a3f011 100644
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -783,8 +783,10 @@ void svm_free_nested(struct vcpu_svm *svm)
+ /*
+  * Forcibly leave nested mode in order to be able to reset the VCPU later on.
+  */
+-void svm_leave_nested(struct vcpu_svm *svm)
++void svm_leave_nested(struct kvm_vcpu *vcpu)
+ {
++      struct vcpu_svm *svm = to_svm(vcpu);
++
+       if (is_guest_mode(&svm->vcpu)) {
+               struct vmcb *hsave = svm->nested.hsave;
+               struct vmcb *vmcb = svm->vmcb;
+@@ -1185,7 +1187,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
+               return -EINVAL;
+ 
+       if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) {
+-              svm_leave_nested(svm);
++              svm_leave_nested(vcpu);
+               svm_set_gif(svm, !!(kvm_state->flags & 
KVM_STATE_NESTED_GIF_SET));
+               return 0;
+       }
+@@ -1238,6 +1240,9 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
+       copy_vmcb_control_area(&hsave->control, &svm->vmcb->control);
+       hsave->save = *save;
+ 
++      if (is_guest_mode(vcpu))
++              svm_leave_nested(vcpu);
++
+       svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
+       load_nested_vmcb_control(svm, ctl);
+       nested_prepare_vmcb_control(svm);
+@@ -1252,6 +1257,7 @@ out_free:
+ }
+ 
+ struct kvm_x86_nested_ops svm_nested_ops = {
++      .leave_nested = svm_leave_nested,
+       .check_events = svm_check_nested_events,
+       .get_nested_state_pages = svm_get_nested_state_pages,
+       .get_state = svm_get_nested_state,
+diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
+index 2e6332af98aba..fa543c355fbdb 100644
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -279,7 +279,7 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
+ 
+       if ((old_efer & EFER_SVME) != (efer & EFER_SVME)) {
+               if (!(efer & EFER_SVME)) {
+-                      svm_leave_nested(svm);
++                      svm_leave_nested(vcpu);
+                       svm_set_gif(svm, true);
+ 
+                       /*
+diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
+index be74e22b82ea7..2c007241fbf53 100644
+--- a/arch/x86/kvm/svm/svm.h
++++ b/arch/x86/kvm/svm/svm.h
+@@ -393,7 +393,7 @@ static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
+ 
+ int enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
+                        struct vmcb *nested_vmcb);
+-void svm_leave_nested(struct vcpu_svm *svm);
++void svm_leave_nested(struct kvm_vcpu *vcpu);
+ void svm_free_nested(struct vcpu_svm *svm);
+ int svm_allocate_nested(struct vcpu_svm *svm);
+ int nested_svm_vmrun(struct vcpu_svm *svm);
+diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
+index 36661b15c3d04..0c2389d0fdafe 100644
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -6628,6 +6628,7 @@ __init int nested_vmx_hardware_setup(int 
(*exit_handlers[])(struct kvm_vcpu *))
+ }
+ 
+ struct kvm_x86_nested_ops vmx_nested_ops = {
++      .leave_nested = vmx_leave_nested,
+       .check_events = vmx_check_nested_events,
+       .hv_timer_pending = nested_vmx_preemption_timer_pending,
+       .get_state = vmx_get_nested_state,
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 7871b8e84b368..a5d6d79b023bc 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -4391,6 +4391,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct 
kvm_vcpu *vcpu,
+                               vcpu->arch.hflags |= HF_SMM_MASK;
+                       else
+                               vcpu->arch.hflags &= ~HF_SMM_MASK;
++
++                      kvm_x86_ops.nested_ops->leave_nested(vcpu);
+                       kvm_smm_changed(vcpu);
+               }
+ 
+diff --git a/drivers/bus/simple-pm-bus.c b/drivers/bus/simple-pm-bus.c
+index 244b8f3b38b40..c5eb46cbf388b 100644
+--- a/drivers/bus/simple-pm-bus.c
++++ b/drivers/bus/simple-pm-bus.c
+@@ -16,33 +16,7 @@
+ 
+ static int simple_pm_bus_probe(struct platform_device *pdev)
+ {
+-      const struct device *dev = &pdev->dev;
+-      struct device_node *np = dev->of_node;
+-      const struct of_device_id *match;
+-
+-      /*
+-       * Allow user to use driver_override to bind this driver to a
+-       * transparent bus device which has a different compatible string
+-       * that's not listed in simple_pm_bus_of_match. We don't want to do any
+-       * of the simple-pm-bus tasks for these devices, so return early.
+-       */
+-      if (pdev->driver_override)
+-              return 0;
+-
+-      match = of_match_device(dev->driver->of_match_table, dev);
+-      /*
+-       * These are transparent bus devices (not simple-pm-bus matches) that
+-       * have their child nodes populated automatically.  So, don't need to
+-       * do anything more. We only match with the device if this driver is
+-       * the most specific match because we don't want to incorrectly bind to
+-       * a device that has a more specific driver.
+-       */
+-      if (match && match->data) {
+-              if (of_property_match_string(np, "compatible", 
match->compatible) == 0)
+-                      return 0;
+-              else
+-                      return -ENODEV;
+-      }
++      struct device_node *np = pdev->dev.of_node;
+ 
+       dev_dbg(&pdev->dev, "%s\n", __func__);
+ 
+@@ -56,25 +30,14 @@ static int simple_pm_bus_probe(struct platform_device 
*pdev)
+ 
+ static int simple_pm_bus_remove(struct platform_device *pdev)
+ {
+-      const void *data = of_device_get_match_data(&pdev->dev);
+-
+-      if (pdev->driver_override || data)
+-              return 0;
+-
+       dev_dbg(&pdev->dev, "%s\n", __func__);
+ 
+       pm_runtime_disable(&pdev->dev);
+       return 0;
+ }
+ 
+-#define ONLY_BUS      ((void *) 1) /* Match if the device is only a bus. */
+-
+ static const struct of_device_id simple_pm_bus_of_match[] = {
+       { .compatible = "simple-pm-bus", },
+-      { .compatible = "simple-bus",   .data = ONLY_BUS },
+-      { .compatible = "simple-mfd",   .data = ONLY_BUS },
+-      { .compatible = "isa",          .data = ONLY_BUS },
+-      { .compatible = "arm,amba-bus", .data = ONLY_BUS },
+       { /* sentinel */ }
+ };
+ MODULE_DEVICE_TABLE(of, simple_pm_bus_of_match);
+diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
+index 9392de2679a1d..8eac7dc637b0f 100644
+--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
++++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
+@@ -1402,18 +1402,18 @@ static int vc4_hdmi_cec_adap_enable(struct cec_adapter 
*adap, bool enable)
+       u32 val;
+       int ret;
+ 
+-      ret = pm_runtime_resume_and_get(&vc4_hdmi->pdev->dev);
+-      if (ret)
+-              return ret;
++      if (enable) {
++              ret = pm_runtime_resume_and_get(&vc4_hdmi->pdev->dev);
++              if (ret)
++                      return ret;
+ 
+-      val = HDMI_READ(HDMI_CEC_CNTRL_5);
+-      val &= ~(VC4_HDMI_CEC_TX_SW_RESET | VC4_HDMI_CEC_RX_SW_RESET |
+-               VC4_HDMI_CEC_CNT_TO_4700_US_MASK |
+-               VC4_HDMI_CEC_CNT_TO_4500_US_MASK);
+-      val |= ((4700 / usecs) << VC4_HDMI_CEC_CNT_TO_4700_US_SHIFT) |
+-             ((4500 / usecs) << VC4_HDMI_CEC_CNT_TO_4500_US_SHIFT);
++              val = HDMI_READ(HDMI_CEC_CNTRL_5);
++              val &= ~(VC4_HDMI_CEC_TX_SW_RESET | VC4_HDMI_CEC_RX_SW_RESET |
++                       VC4_HDMI_CEC_CNT_TO_4700_US_MASK |
++                       VC4_HDMI_CEC_CNT_TO_4500_US_MASK);
++              val |= ((4700 / usecs) << VC4_HDMI_CEC_CNT_TO_4700_US_SHIFT) |
++                      ((4500 / usecs) << VC4_HDMI_CEC_CNT_TO_4500_US_SHIFT);
+ 
+-      if (enable) {
+               HDMI_WRITE(HDMI_CEC_CNTRL_5, val |
+                          VC4_HDMI_CEC_TX_SW_RESET | VC4_HDMI_CEC_RX_SW_RESET);
+               HDMI_WRITE(HDMI_CEC_CNTRL_5, val);
+@@ -1439,7 +1439,10 @@ static int vc4_hdmi_cec_adap_enable(struct cec_adapter 
*adap, bool enable)
+               HDMI_WRITE(HDMI_CEC_CPU_MASK_SET, VC4_HDMI_CPU_CEC);
+               HDMI_WRITE(HDMI_CEC_CNTRL_5, val |
+                          VC4_HDMI_CEC_TX_SW_RESET | VC4_HDMI_CEC_RX_SW_RESET);
++
++              pm_runtime_put(&vc4_hdmi->pdev->dev);
+       }
++
+       return 0;
+ }
+ 
+@@ -1531,8 +1534,6 @@ static int vc4_hdmi_cec_init(struct vc4_hdmi *vc4_hdmi)
+       if (ret < 0)
+               goto err_delete_cec_adap;
+ 
+-      pm_runtime_put(&vc4_hdmi->pdev->dev);
+-
+       return 0;
+ 
+ err_delete_cec_adap:
+diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c 
b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+index 395eb0b526802..a816b30bca04c 100644
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+@@ -721,7 +721,9 @@ static void xgbe_stop_timers(struct xgbe_prv_data *pdata)
+               if (!channel->tx_ring)
+                       break;
+ 
++              /* Deactivate the Tx timer */
+               del_timer_sync(&channel->tx_timer);
++              channel->tx_timer_active = 0;
+       }
+ }
+ 
+@@ -2557,6 +2559,14 @@ read_again:
+                       buf2_len = xgbe_rx_buf2_len(rdata, packet, len);
+                       len += buf2_len;
+ 
++                      if (buf2_len > rdata->rx.buf.dma_len) {
++                              /* Hardware inconsistency within the descriptors
++                               * that has resulted in a length underflow.
++                               */
++                              error = 1;
++                              goto skip_data;
++                      }
++
+                       if (!skb) {
+                               skb = xgbe_create_skb(pdata, napi, rdata,
+                                                     buf1_len);
+@@ -2586,8 +2596,10 @@ skip_data:
+               if (!last || context_next)
+                       goto read_again;
+ 
+-              if (!skb)
++              if (!skb || error) {
++                      dev_kfree_skb(skb);
+                       goto next_packet;
++              }
+ 
+               /* Be sure we don't exceed the configured MTU */
+               max_len = netdev->mtu + ETH_HLEN;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
+index 9c076aa20306a..b6f5c1bcdbcd4 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
+@@ -183,18 +183,7 @@ void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw,
+ 
+ static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev)
+ {
+-      struct mlx5e_rep_priv *rpriv;
+-      struct mlx5e_priv *priv;
+-
+-      /* A given netdev is not a representor or not a slave of LAG 
configuration */
+-      if (!mlx5e_eswitch_rep(netdev) || !netif_is_lag_port(netdev))
+-              return false;
+-
+-      priv = netdev_priv(netdev);
+-      rpriv = priv->ppriv;
+-
+-      /* Egress acl forward to vport is supported only non-uplink representor 
*/
+-      return rpriv->rep->vport != MLX5_VPORT_UPLINK;
++      return netif_is_lag_port(netdev) && mlx5e_eswitch_vf_rep(netdev);
+ }
+ 
+ static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void 
*ptr)
+@@ -210,9 +199,6 @@ static void mlx5e_rep_changelowerstate_event(struct 
net_device *netdev, void *pt
+       u16 fwd_vport_num;
+       int err;
+ 
+-      if (!mlx5e_rep_is_lag_netdev(netdev))
+-              return;
+-
+       info = ptr;
+       lag_info = info->lower_state_info;
+       /* This is not an event of a representor becoming active slave */
+@@ -266,9 +252,6 @@ static void mlx5e_rep_changeupper_event(struct net_device 
*netdev, void *ptr)
+       struct net_device *lag_dev;
+       struct mlx5e_priv *priv;
+ 
+-      if (!mlx5e_rep_is_lag_netdev(netdev))
+-              return;
+-
+       priv = netdev_priv(netdev);
+       rpriv = priv->ppriv;
+       lag_dev = info->upper_dev;
+@@ -293,6 +276,19 @@ static int mlx5e_rep_esw_bond_netevent(struct 
notifier_block *nb,
+                                      unsigned long event, void *ptr)
+ {
+       struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
++      struct mlx5e_rep_priv *rpriv;
++      struct mlx5e_rep_bond *bond;
++      struct mlx5e_priv *priv;
++
++      if (!mlx5e_rep_is_lag_netdev(netdev))
++              return NOTIFY_DONE;
++
++      bond = container_of(nb, struct mlx5e_rep_bond, nb);
++      priv = netdev_priv(netdev);
++      rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch, REP_ETH);
++      /* Verify VF representor is on the same device of the bond handling the 
netevent. */
++      if (rpriv->uplink_priv.bond != bond)
++              return NOTIFY_DONE;
+ 
+       switch (event) {
+       case NETDEV_CHANGELOWERSTATE:
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+index ee710ce007950..9b472e793ee36 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+@@ -131,7 +131,7 @@ static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev 
*dev)
+ {
+       struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+ 
+-      del_timer(&fw_reset->timer);
++      del_timer_sync(&fw_reset->timer);
+ }
+ 
+ static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, 
bool poll_health)
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c 
b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
+index 947f346bdc2d6..77c6287c90d55 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
+@@ -292,7 +292,7 @@ static int
+ create_chain_restore(struct fs_chain *chain)
+ {
+       struct mlx5_eswitch *esw = chain->chains->dev->priv.eswitch;
+-      char modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)];
++      u8 modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
+       struct mlx5_fs_chains *chains = chain->chains;
+       enum mlx5e_tc_attr_to_reg chain_to_reg;
+       struct mlx5_modify_hdr *mod_hdr;
+diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
+index a37aae00e128f..621648ce750b7 100644
+--- a/drivers/net/ipa/ipa_endpoint.c
++++ b/drivers/net/ipa/ipa_endpoint.c
+@@ -901,27 +901,35 @@ static void ipa_endpoint_replenish(struct ipa_endpoint 
*endpoint, u32 count)
+       struct gsi *gsi;
+       u32 backlog;
+ 
+-      if (!endpoint->replenish_enabled) {
++      if (!test_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags)) {
+               if (count)
+                       atomic_add(count, &endpoint->replenish_saved);
+               return;
+       }
+ 
++      /* If already active, just update the backlog */
++      if (test_and_set_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags)) {
++              if (count)
++                      atomic_add(count, &endpoint->replenish_backlog);
++              return;
++      }
+ 
+       while (atomic_dec_not_zero(&endpoint->replenish_backlog))
+               if (ipa_endpoint_replenish_one(endpoint))
+                       goto try_again_later;
++
++      clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags);
++
+       if (count)
+               atomic_add(count, &endpoint->replenish_backlog);
+ 
+       return;
+ 
+ try_again_later:
+-      /* The last one didn't succeed, so fix the backlog */
+-      backlog = atomic_inc_return(&endpoint->replenish_backlog);
++      clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags);
+ 
+-      if (count)
+-              atomic_add(count, &endpoint->replenish_backlog);
++      /* The last one didn't succeed, so fix the backlog */
++      backlog = atomic_add_return(count + 1, &endpoint->replenish_backlog);
+ 
+       /* Whenever a receive buffer transaction completes we'll try to
+        * replenish again.  It's unlikely, but if we fail to supply even
+@@ -941,7 +949,7 @@ static void ipa_endpoint_replenish_enable(struct 
ipa_endpoint *endpoint)
+       u32 max_backlog;
+       u32 saved;
+ 
+-      endpoint->replenish_enabled = true;
++      set_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags);
+       while ((saved = atomic_xchg(&endpoint->replenish_saved, 0)))
+               atomic_add(saved, &endpoint->replenish_backlog);
+ 
+@@ -955,7 +963,7 @@ static void ipa_endpoint_replenish_disable(struct 
ipa_endpoint *endpoint)
+ {
+       u32 backlog;
+ 
+-      endpoint->replenish_enabled = false;
++      clear_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags);
+       while ((backlog = atomic_xchg(&endpoint->replenish_backlog, 0)))
+               atomic_add(backlog, &endpoint->replenish_saved);
+ }
+@@ -1472,7 +1480,8 @@ static void ipa_endpoint_setup_one(struct ipa_endpoint 
*endpoint)
+               /* RX transactions require a single TRE, so the maximum
+                * backlog is the same as the maximum outstanding TREs.
+                */
+-              endpoint->replenish_enabled = false;
++              clear_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags);
++              clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags);
+               atomic_set(&endpoint->replenish_saved,
+                          gsi_channel_tre_max(gsi, endpoint->channel_id));
+               atomic_set(&endpoint->replenish_backlog, 0);
+diff --git a/drivers/net/ipa/ipa_endpoint.h b/drivers/net/ipa/ipa_endpoint.h
+index 58a245de488e8..823c4a1296587 100644
+--- a/drivers/net/ipa/ipa_endpoint.h
++++ b/drivers/net/ipa/ipa_endpoint.h
+@@ -39,6 +39,19 @@ enum ipa_endpoint_name {
+ 
+ #define IPA_ENDPOINT_MAX              32      /* Max supported by driver */
+ 
++/**
++ * enum ipa_replenish_flag:   RX buffer replenish flags
++ *
++ * @IPA_REPLENISH_ENABLED:    Whether receive buffer replenishing is enabled
++ * @IPA_REPLENISH_ACTIVE:     Whether replenishing is underway
++ * @IPA_REPLENISH_COUNT:      Number of defined replenish flags
++ */
++enum ipa_replenish_flag {
++      IPA_REPLENISH_ENABLED,
++      IPA_REPLENISH_ACTIVE,
++      IPA_REPLENISH_COUNT,    /* Number of flags (must be last) */
++};
++
+ /**
+  * struct ipa_endpoint - IPA endpoint information
+  * @channel_id:       EP's GSI channel
+@@ -60,7 +73,7 @@ struct ipa_endpoint {
+       struct net_device *netdev;
+ 
+       /* Receive buffer replenishing for RX endpoints */
+-      bool replenish_enabled;
++      DECLARE_BITMAP(replenish_flags, IPA_REPLENISH_COUNT);
+       u32 replenish_ready;
+       atomic_t replenish_saved;
+       atomic_t replenish_backlog;
+diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c
+index 207e59e74935a..06d9f19ca142a 100644
+--- a/drivers/net/usb/ipheth.c
++++ b/drivers/net/usb/ipheth.c
+@@ -121,7 +121,7 @@ static int ipheth_alloc_urbs(struct ipheth_device *iphone)
+       if (tx_buf == NULL)
+               goto free_rx_urb;
+ 
+-      rx_buf = usb_alloc_coherent(iphone->udev, IPHETH_BUF_SIZE,
++      rx_buf = usb_alloc_coherent(iphone->udev, IPHETH_BUF_SIZE + 
IPHETH_IP_ALIGN,
+                                   GFP_KERNEL, &rx_urb->transfer_dma);
+       if (rx_buf == NULL)
+               goto free_tx_buf;
+@@ -146,7 +146,7 @@ error_nomem:
+ 
+ static void ipheth_free_urbs(struct ipheth_device *iphone)
+ {
+-      usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE, iphone->rx_buf,
++      usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN, 
iphone->rx_buf,
+                         iphone->rx_urb->transfer_dma);
+       usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE, iphone->tx_buf,
+                         iphone->tx_urb->transfer_dma);
+@@ -317,7 +317,7 @@ static int ipheth_rx_submit(struct ipheth_device *dev, 
gfp_t mem_flags)
+ 
+       usb_fill_bulk_urb(dev->rx_urb, udev,
+                         usb_rcvbulkpipe(udev, dev->bulk_in),
+-                        dev->rx_buf, IPHETH_BUF_SIZE,
++                        dev->rx_buf, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN,
+                         ipheth_rcvbulk_callback,
+                         dev);
+       dev->rx_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+diff --git a/drivers/pci/hotplug/pciehp_hpc.c 
b/drivers/pci/hotplug/pciehp_hpc.c
+index 90da17c6da664..30708af975adc 100644
+--- a/drivers/pci/hotplug/pciehp_hpc.c
++++ b/drivers/pci/hotplug/pciehp_hpc.c
+@@ -642,6 +642,8 @@ read_status:
+        */
+       if (ctrl->power_fault_detected)
+               status &= ~PCI_EXP_SLTSTA_PFD;
++      else if (status & PCI_EXP_SLTSTA_PFD)
++              ctrl->power_fault_detected = true;
+ 
+       events |= status;
+       if (!events) {
+@@ -651,7 +653,7 @@ read_status:
+       }
+ 
+       if (status) {
+-              pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, events);
++              pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, status);
+ 
+               /*
+                * In MSI mode, all event bits must be zero before the port
+@@ -725,8 +727,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id)
+       }
+ 
+       /* Check Power Fault Detected */
+-      if ((events & PCI_EXP_SLTSTA_PFD) && !ctrl->power_fault_detected) {
+-              ctrl->power_fault_detected = 1;
++      if (events & PCI_EXP_SLTSTA_PFD) {
+               ctrl_err(ctrl, "Slot(%s): Power fault\n", slot_name(ctrl));
+               pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
+                                     PCI_EXP_SLTCTL_ATTN_IND_ON);
+diff --git a/fs/notify/fanotify/fanotify_user.c 
b/fs/notify/fanotify/fanotify_user.c
+index 086b6bacbad17..18e014fa06480 100644
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -366,9 +366,6 @@ static ssize_t copy_event_to_user(struct fsnotify_group 
*group,
+       if (fanotify_is_perm_event(event->mask))
+               FANOTIFY_PERM(event)->fd = fd;
+ 
+-      if (f)
+-              fd_install(fd, f);
+-
+       /* Event info records order is: dir fid + name, child fid */
+       if (fanotify_event_dir_fh_len(event)) {
+               info_type = info->name_len ? FAN_EVENT_INFO_TYPE_DFID_NAME :
+@@ -432,6 +429,9 @@ static ssize_t copy_event_to_user(struct fsnotify_group 
*group,
+               count -= ret;
+       }
+ 
++      if (f)
++              fd_install(fd, f);
++
+       return metadata.event_len;
+ 
+ out_close_fd:
+diff --git a/include/linux/psi.h b/include/linux/psi.h
+index 7361023f3fdd5..db4ecfaab8792 100644
+--- a/include/linux/psi.h
++++ b/include/linux/psi.h
+@@ -33,7 +33,7 @@ void cgroup_move_task(struct task_struct *p, struct css_set 
*to);
+ 
+ struct psi_trigger *psi_trigger_create(struct psi_group *group,
+                       char *buf, size_t nbytes, enum psi_res res);
+-void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *t);
++void psi_trigger_destroy(struct psi_trigger *t);
+ 
+ __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
+                       poll_table *wait);
+diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
+index b95f3211566a2..17d74f62c1818 100644
+--- a/include/linux/psi_types.h
++++ b/include/linux/psi_types.h
+@@ -128,9 +128,6 @@ struct psi_trigger {
+        * events to one per window
+        */
+       u64 last_event_time;
+-
+-      /* Refcounting to prevent premature destruction */
+-      struct kref refcount;
+ };
+ 
+ struct psi_group {
+diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
+index 7f71b54c06c5f..69fba563c810e 100644
+--- a/kernel/cgroup/cgroup-v1.c
++++ b/kernel/cgroup/cgroup-v1.c
+@@ -545,6 +545,14 @@ static ssize_t cgroup_release_agent_write(struct 
kernfs_open_file *of,
+ 
+       BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
+ 
++      /*
++       * Release agent gets called with all capabilities,
++       * require capabilities to set release agent.
++       */
++      if ((of->file->f_cred->user_ns != &init_user_ns) ||
++          !capable(CAP_SYS_ADMIN))
++              return -EPERM;
++
+       cgrp = cgroup_kn_lock_live(of->kn, false);
+       if (!cgrp)
+               return -ENODEV;
+@@ -958,6 +966,12 @@ int cgroup1_parse_param(struct fs_context *fc, struct 
fs_parameter *param)
+               /* Specifying two release agents is forbidden */
+               if (ctx->release_agent)
+                       return invalfc(fc, "release_agent respecified");
++              /*
++               * Release agent gets called with all capabilities,
++               * require capabilities to set release agent.
++               */
++              if ((fc->user_ns != &init_user_ns) || !capable(CAP_SYS_ADMIN))
++                      return invalfc(fc, "Setting release_agent not allowed");
+               ctx->release_agent = param->string;
+               param->string = NULL;
+               break;
+diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
+index a86857edaa571..4927289a91a97 100644
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -3601,6 +3601,12 @@ static ssize_t cgroup_pressure_write(struct 
kernfs_open_file *of, char *buf,
+       cgroup_get(cgrp);
+       cgroup_kn_unlock(of->kn);
+ 
++      /* Allow only one trigger per file descriptor */
++      if (of->priv) {
++              cgroup_put(cgrp);
++              return -EBUSY;
++      }
++
+       psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
+       new = psi_trigger_create(psi, buf, nbytes, res);
+       if (IS_ERR(new)) {
+@@ -3608,8 +3614,7 @@ static ssize_t cgroup_pressure_write(struct 
kernfs_open_file *of, char *buf,
+               return PTR_ERR(new);
+       }
+ 
+-      psi_trigger_replace(&of->priv, new);
+-
++      smp_store_release(&of->priv, new);
+       cgroup_put(cgrp);
+ 
+       return nbytes;
+@@ -3644,7 +3649,7 @@ static __poll_t cgroup_pressure_poll(struct 
kernfs_open_file *of,
+ 
+ static void cgroup_pressure_release(struct kernfs_open_file *of)
+ {
+-      psi_trigger_replace(&of->priv, NULL);
++      psi_trigger_destroy(of->priv);
+ }
+ #endif /* CONFIG_PSI */
+ 
+diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
+index 1999fcec45c71..7c7758a9e2c24 100644
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -1566,8 +1566,7 @@ static int update_cpumask(struct cpuset *cs, struct 
cpuset *trialcs,
+        * Make sure that subparts_cpus is a subset of cpus_allowed.
+        */
+       if (cs->nr_subparts_cpus) {
+-              cpumask_andnot(cs->subparts_cpus, cs->subparts_cpus,
+-                             cs->cpus_allowed);
++              cpumask_and(cs->subparts_cpus, cs->subparts_cpus, 
cs->cpus_allowed);
+               cs->nr_subparts_cpus = cpumask_weight(cs->subparts_cpus);
+       }
+       spin_unlock_irq(&callback_lock);
+diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
+index d50a31ecedeec..b7f38f3ad42a2 100644
+--- a/kernel/sched/psi.c
++++ b/kernel/sched/psi.c
+@@ -1116,7 +1116,6 @@ struct psi_trigger *psi_trigger_create(struct psi_group 
*group,
+       t->event = 0;
+       t->last_event_time = 0;
+       init_waitqueue_head(&t->event_wait);
+-      kref_init(&t->refcount);
+ 
+       mutex_lock(&group->trigger_lock);
+ 
+@@ -1145,15 +1144,19 @@ struct psi_trigger *psi_trigger_create(struct 
psi_group *group,
+       return t;
+ }
+ 
+-static void psi_trigger_destroy(struct kref *ref)
++void psi_trigger_destroy(struct psi_trigger *t)
+ {
+-      struct psi_trigger *t = container_of(ref, struct psi_trigger, refcount);
+-      struct psi_group *group = t->group;
++      struct psi_group *group;
+       struct task_struct *task_to_destroy = NULL;
+ 
+-      if (static_branch_likely(&psi_disabled))
++      /*
++       * We do not check psi_disabled since it might have been disabled after
++       * the trigger got created.
++       */
++      if (!t)
+               return;
+ 
++      group = t->group;
+       /*
+        * Wakeup waiters to stop polling. Can happen if cgroup is deleted
+        * from under a polling process.
+@@ -1189,9 +1192,9 @@ static void psi_trigger_destroy(struct kref *ref)
+       mutex_unlock(&group->trigger_lock);
+ 
+       /*
+-       * Wait for both *trigger_ptr from psi_trigger_replace and
+-       * poll_task RCUs to complete their read-side critical sections
+-       * before destroying the trigger and optionally the poll_task
++       * Wait for psi_schedule_poll_work RCU to complete its read-side
++       * critical section before destroying the trigger and optionally the
++       * poll_task.
+        */
+       synchronize_rcu();
+       /*
+@@ -1208,18 +1211,6 @@ static void psi_trigger_destroy(struct kref *ref)
+       kfree(t);
+ }
+ 
+-void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *new)
+-{
+-      struct psi_trigger *old = *trigger_ptr;
+-
+-      if (static_branch_likely(&psi_disabled))
+-              return;
+-
+-      rcu_assign_pointer(*trigger_ptr, new);
+-      if (old)
+-              kref_put(&old->refcount, psi_trigger_destroy);
+-}
+-
+ __poll_t psi_trigger_poll(void **trigger_ptr,
+                               struct file *file, poll_table *wait)
+ {
+@@ -1229,24 +1220,15 @@ __poll_t psi_trigger_poll(void **trigger_ptr,
+       if (static_branch_likely(&psi_disabled))
+               return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
+ 
+-      rcu_read_lock();
+-
+-      t = rcu_dereference(*(void __rcu __force **)trigger_ptr);
+-      if (!t) {
+-              rcu_read_unlock();
++      t = smp_load_acquire(trigger_ptr);
++      if (!t)
+               return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
+-      }
+-      kref_get(&t->refcount);
+-
+-      rcu_read_unlock();
+ 
+       poll_wait(file, &t->event_wait, wait);
+ 
+       if (cmpxchg(&t->event, 1, 0) == 1)
+               ret |= EPOLLPRI;
+ 
+-      kref_put(&t->refcount, psi_trigger_destroy);
+-
+       return ret;
+ }
+ 
+@@ -1270,14 +1252,24 @@ static ssize_t psi_write(struct file *file, const char 
__user *user_buf,
+ 
+       buf[buf_size - 1] = '\0';
+ 
+-      new = psi_trigger_create(&psi_system, buf, nbytes, res);
+-      if (IS_ERR(new))
+-              return PTR_ERR(new);
+-
+       seq = file->private_data;
++
+       /* Take seq->lock to protect seq->private from concurrent writes */
+       mutex_lock(&seq->lock);
+-      psi_trigger_replace(&seq->private, new);
++
++      /* Allow only one trigger per file descriptor */
++      if (seq->private) {
++              mutex_unlock(&seq->lock);
++              return -EBUSY;
++      }
++
++      new = psi_trigger_create(&psi_system, buf, nbytes, res);
++      if (IS_ERR(new)) {
++              mutex_unlock(&seq->lock);
++              return PTR_ERR(new);
++      }
++
++      smp_store_release(&seq->private, new);
+       mutex_unlock(&seq->lock);
+ 
+       return nbytes;
+@@ -1312,7 +1304,7 @@ static int psi_fop_release(struct inode *inode, struct 
file *file)
+ {
+       struct seq_file *seq = file->private_data;
+ 
+-      psi_trigger_replace(&seq->private, NULL);
++      psi_trigger_destroy(seq->private);
+       return single_release(inode, file);
+ }
+ 
+diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
+index 27ffa83ffeb3c..373564bf57acb 100644
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -3238,8 +3238,8 @@ static int __rtnl_newlink(struct sk_buff *skb, struct 
nlmsghdr *nlh,
+       struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1];
+       unsigned char name_assign_type = NET_NAME_USER;
+       struct nlattr *linkinfo[IFLA_INFO_MAX + 1];
+-      const struct rtnl_link_ops *m_ops = NULL;
+-      struct net_device *master_dev = NULL;
++      const struct rtnl_link_ops *m_ops;
++      struct net_device *master_dev;
+       struct net *net = sock_net(skb->sk);
+       const struct rtnl_link_ops *ops;
+       struct nlattr *tb[IFLA_MAX + 1];
+@@ -3277,6 +3277,8 @@ replay:
+       else
+               dev = NULL;
+ 
++      master_dev = NULL;
++      m_ops = NULL;
+       if (dev) {
+               master_dev = netdev_master_upper_dev_get(dev);
+               if (master_dev)
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 991e3434957b8..12dd08af12b5e 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -1620,6 +1620,8 @@ static struct sk_buff *tcp_shift_skb_data(struct sock 
*sk, struct sk_buff *skb,
+           (mss != tcp_skb_seglen(skb)))
+               goto out;
+ 
++      if (!tcp_skb_can_collapse(prev, skb))
++              goto out;
+       len = skb->len;
+       pcount = tcp_skb_pcount(skb);
+       if (tcp_skb_shift(prev, skb, pcount, len))
+diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
+index 6ef035494f30d..a31334b92be7e 100644
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -1750,7 +1750,10 @@ static int fanout_add(struct sock *sk, struct 
fanout_args *args)
+               err = -ENOSPC;
+               if (refcount_read(&match->sk_ref) < match->max_num_members) {
+                       __dev_remove_pack(&po->prot_hook);
+-                      po->fanout = match;
++
++                      /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */
++                      WRITE_ONCE(po->fanout, match);
++
+                       po->rollover = rollover;
+                       rollover = NULL;
+                       refcount_set(&match->sk_ref, 
refcount_read(&match->sk_ref) + 1);
+@@ -3906,7 +3909,8 @@ packet_setsockopt(struct socket *sock, int level, int 
optname, sockptr_t optval,
+       }
+       case PACKET_FANOUT_DATA:
+       {
+-              if (!po->fanout)
++              /* Paired with the WRITE_ONCE() in fanout_add() */
++              if (!READ_ONCE(po->fanout))
+                       return -EINVAL;
+ 
+               return fanout_set_data(po, optval, optlen);
+diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
+index cb1331b357451..7993a692c7fda 100644
+--- a/net/sched/cls_api.c
++++ b/net/sched/cls_api.c
+@@ -1954,9 +1954,9 @@ static int tc_new_tfilter(struct sk_buff *skb, struct 
nlmsghdr *n,
+       bool prio_allocate;
+       u32 parent;
+       u32 chain_index;
+-      struct Qdisc *q = NULL;
++      struct Qdisc *q;
+       struct tcf_chain_info chain_info;
+-      struct tcf_chain *chain = NULL;
++      struct tcf_chain *chain;
+       struct tcf_block *block;
+       struct tcf_proto *tp;
+       unsigned long cl;
+@@ -1984,6 +1984,8 @@ replay:
+       tp = NULL;
+       cl = 0;
+       block = NULL;
++      q = NULL;
++      chain = NULL;
+ 
+       if (prio == 0) {
+               /* If no priority is provided by the user,
+@@ -2804,8 +2806,8 @@ static int tc_ctl_chain(struct sk_buff *skb, struct 
nlmsghdr *n,
+       struct tcmsg *t;
+       u32 parent;
+       u32 chain_index;
+-      struct Qdisc *q = NULL;
+-      struct tcf_chain *chain = NULL;
++      struct Qdisc *q;
++      struct tcf_chain *chain;
+       struct tcf_block *block;
+       unsigned long cl;
+       int err;
+@@ -2815,6 +2817,7 @@ static int tc_ctl_chain(struct sk_buff *skb, struct 
nlmsghdr *n,
+               return -EPERM;
+ 
+ replay:
++      q = NULL;
+       err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
+                                    rtm_tca_policy, extack);
+       if (err < 0)

Reply via email to