On Tue, Nov 25, 2025 at 06:58:37AM -0800, Tantilov, Emil S wrote:
> 
> 
> On 11/25/2025 5:42 AM, Simon Horman wrote:
> > On Thu, Nov 20, 2025 at 04:12:15PM -0800, Emil Tantilov wrote:
> > 
> > ...
> > 
> > > diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c 
> > > b/drivers/net/ethernet/intel/idpf/idpf_lib.c
> > > index 2a53f3d504d2..5c81f52db266 100644
> > > --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
> > > +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
> > > @@ -752,6 +752,65 @@ static int idpf_init_mac_addr(struct idpf_vport 
> > > *vport,
> > >           return 0;
> > >   }
> > > +static void idpf_detach_and_close(struct idpf_adapter *adapter)
> > > +{
> > > + int max_vports = adapter->max_vports;
> > > +
> > > + for (int i = 0; i < max_vports; i++) {
> > > +         struct net_device *netdev = adapter->netdevs[i];
> > > +
> > > +         /* If the interface is in detached state, that means the
> > > +          * previous reset was not handled successfully for this
> > > +          * vport.
> > > +          */
> > > +         if (!netif_device_present(netdev))
> > > +                 continue;
> > > +
> > > +         /* Hold RTNL to protect racing with callbacks */
> > > +         rtnl_lock();
> > > +         netif_device_detach(netdev);
> > > +         if (netif_running(netdev)) {
> > > +                 set_bit(IDPF_VPORT_UP_REQUESTED,
> > > +                         adapter->vport_config[i]->flags);
> > > +                 dev_close(netdev);
> > > +         }
> > > +         rtnl_unlock();
> > > + }
> > > +}
> > > +
> > > +static void idpf_attach_and_open(struct idpf_adapter *adapter)
> > > +{
> > > + int max_vports = adapter->max_vports;
> > > +
> > > + for (int i = 0; i < max_vports; i++) {
> > > +         struct idpf_vport *vport = adapter->vports[i];
> > > +         struct idpf_vport_config *vport_config;
> > > +         struct net_device *netdev;
> > > +
> > > +         /* In case of a critical error in the init task, the vport
> > > +          * will be freed. Only continue to restore the netdevs
> > > +          * if the vport is allocated.
> > > +          */
> > > +         if (!vport)
> > > +                 continue;
> > > +
> > > +         /* No need for RTNL on attach as this function is called
> > > +          * following detach and dev_close(). We do take RTNL for
> > > +          * dev_open() below as it can race with external callbacks
> > > +          * following the call to netif_device_attach().
> > > +          */
> > > +         netdev = adapter->netdevs[i];
> > > +         netif_device_attach(netdev);
> > > +         vport_config = adapter->vport_config[vport->idx];
> > > +         if (test_and_clear_bit(IDPF_VPORT_UP_REQUESTED,
> > > +                                vport_config->flags)) {
> > > +                 rtnl_lock();
> > > +                 dev_open(netdev, NULL);
> > > +                 rtnl_unlock();
> > > +         }
> > > + }
> > > +}
> > > +
> > >   /**
> > >    * idpf_cfg_netdev - Allocate, configure and register a netdev
> > >    * @vport: main vport structure
> > 
> > ...
> > 
> > > @@ -1807,27 +1860,6 @@ static int idpf_check_reset_complete(struct 
> > > idpf_hw *hw,
> > >           return -EBUSY;
> > >   }
> > > -/**
> > > - * idpf_set_vport_state - Set the vport state to be after the reset
> > > - * @adapter: Driver specific private structure
> > > - */
> > > -static void idpf_set_vport_state(struct idpf_adapter *adapter)
> > > -{
> > > - u16 i;
> > > -
> > > - for (i = 0; i < adapter->max_vports; i++) {
> > > -         struct idpf_netdev_priv *np;
> > > -
> > > -         if (!adapter->netdevs[i])
> > > -                 continue;
> > > -
> > > -         np = netdev_priv(adapter->netdevs[i]);
> > > -         if (np->state == __IDPF_VPORT_UP)
> > > -                 set_bit(IDPF_VPORT_UP_REQUESTED,
> > > -                         adapter->vport_config[i]->flags);
> > > - }
> > > -}
> > > -
> > >   /**
> > >    * idpf_init_hard_reset - Initiate a hardware reset
> > >    * @adapter: Driver specific private structure
> > 
> > > @@ -1836,28 +1868,17 @@ static void idpf_set_vport_state(struct 
> > > idpf_adapter *adapter)
> > >    * reallocate. Also reinitialize the mailbox. Return 0 on success,
> > >    * negative on failure.
> > >    */
> > > -static int idpf_init_hard_reset(struct idpf_adapter *adapter)
> > > +static void idpf_init_hard_reset(struct idpf_adapter *adapter)
> > >   {
> > >           struct idpf_reg_ops *reg_ops = &adapter->dev_ops.reg_ops;
> > >           struct device *dev = &adapter->pdev->dev;
> > > - struct net_device *netdev;
> > >           int err;
> > > - u16 i;
> > > + idpf_detach_and_close(adapter);
> > >           mutex_lock(&adapter->vport_ctrl_lock);
> > >           dev_info(dev, "Device HW Reset initiated\n");
> > > - /* Avoid TX hangs on reset */
> > > - for (i = 0; i < adapter->max_vports; i++) {
> > > -         netdev = adapter->netdevs[i];
> > > -         if (!netdev)
> > > -                 continue;
> > 
> > Hi Emil,
> > 
> > In this code that is removed there is a check for !netdev.
> > And also there is a similar check in idpf_set_vport_state().
> > But there is no such check in idpf_detach_and_close().
> > Is this intentional?
> 
> This logic is a bit confusing because the reset path is executed on both
> driver load and a reset (since the initialization is identical it makes
> sense to re-use the code). This is what roughly happens on load and
> reset:
> 
> driver load -> reset -> configure vports -> create netdevs
> reset -> de-allocate vports -> re-allocate vports
> 
> The first patch:
> https://lore.kernel.org/intel-wired-lan/[email protected]/
> 
> makes sure that we never lose the netdev on a reset, following a
> successful driver load. Previously this could happen in the error path.
> In other words during a reset there is no need to check for a netdev as
> this is guaranteed, but we must make sure that vports are present as
> those can be freed.
> 
> The 5th patch:
> https://lore.kernel.org/intel-wired-lan/[email protected]/
> 
> fixes another instance where we could fail in the reset error path by
> ensuring the service task, which handles resets is cancelled as at
> that point we have neither vports, nor netdevs, hence nothing to
> "serve". Hope this makes sense, but the gist of it is that with this
> series applied the reset can be protected by just making sure that
> the vports are allocated. If for whatever reason netdevs happen to
> be NULL, following this series it would be a bug introduced somewhere
> else in the code that will have to be addressed.

I did spend a bit of time trying to figure out the flow,
but not entirely successfully. Thanks for setting me straight.

...

Reply via email to