From: Ahmed Zaki <[email protected]>

If a reset event is received from the PF early in the init cycle, the
state machine hangs for about 25 seconds.

Reproducer:
  echo 1 > /sys/class/net/$PF0/device/sriov_numvfs
  ip link set dev $PF0 vf 0 mac $NEW_MAC

The log shows:
  [792.620416] ice 0000:5e:00.0: Enabling 1 VFs
  [792.738812] iavf 0000:5e:01.0: enabling device (0000 -> 0002)
  [792.744182] ice 0000:5e:00.0: Enabling 1 VFs with 17 vectors and 16 queues 
per VF
  [792.839964] ice 0000:5e:00.0: Setting MAC 52:54:00:00:00:11 on VF 0. VF 
driver will be reinitialized
  [813.389684] iavf 0000:5e:01.0: Failed to communicate with PF; waiting before 
retry
  [818.635918] iavf 0000:5e:01.0: Hardware came out of reset. Attempting reinit.
  [818.766273] iavf 0000:5e:01.0: Multiqueue Enabled: Queue pair count = 16

Fix it by scheduling the reset task and making the reset task capable of
resetting early in the init cycle.

Fixes: ef8693eb90ae3 ("i40evf: refactor reset handling")
Signed-off-by: Ahmed Zaki <[email protected]>
Tested-by: Przemek Kitszel <[email protected]>
Reviewed-by: Przemek Kitszel <[email protected]>
Signed-off-by: Marcin Szycik <[email protected]>
---
This should be applied after "iavf: get rid of the crit lock"
---
 drivers/net/ethernet/intel/iavf/iavf_main.c     | 11 +++++++++++
 drivers/net/ethernet/intel/iavf/iavf_virtchnl.c | 17 +++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c 
b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 2c0bb41809a4..81d7249d1149 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -3209,6 +3209,17 @@ static void iavf_reset_task(struct work_struct *work)
        }
 
 continue_reset:
+       /* If we are still early in the state machine, just restart. */
+       if (adapter->state <= __IAVF_INIT_FAILED) {
+               iavf_shutdown_adminq(hw);
+               iavf_change_state(adapter, __IAVF_STARTUP);
+               iavf_startup(adapter);
+               queue_delayed_work(adapter->wq, &adapter->watchdog_task,
+                                  msecs_to_jiffies(30));
+               netdev_unlock(netdev);
+               return;
+       }
+
        /* We don't use netif_running() because it may be true prior to
         * ndo_open() returning, so we can't assume it means all our open
         * tasks have finished, since we're not holding the rtnl_lock here.
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c 
b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
index a6f0e5990be2..07f0d0a0f1e2 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -79,6 +79,23 @@ iavf_poll_virtchnl_msg(struct iavf_hw *hw, struct 
iavf_arq_event_info *event,
                        return iavf_status_to_errno(status);
                received_op =
                    (enum virtchnl_ops)le32_to_cpu(event->desc.cookie_high);
+
+               if (received_op == VIRTCHNL_OP_EVENT) {
+                       struct iavf_adapter *adapter = hw->back;
+                       struct virtchnl_pf_event *vpe =
+                               (struct virtchnl_pf_event *)event->msg_buf;
+
+                       if (vpe->event != VIRTCHNL_EVENT_RESET_IMPENDING)
+                               continue;
+
+                       dev_info(&adapter->pdev->dev, "Reset indication 
received from the PF\n");
+                       if (!(adapter->flags & IAVF_FLAG_RESET_PENDING))
+                               iavf_schedule_reset(adapter,
+                                                   IAVF_FLAG_RESET_PENDING);
+
+                       return -EIO;
+               }
+
                if (op_to_poll == received_op)
                        break;
        }
-- 
2.49.0

Reply via email to