Mon, Jul 15, 2024 at 09:11:48PM CEST, [email protected] wrote:
>Add support for reporting fw status via the devlink health report.
>
>Example:
> # devlink health show pci/0000:02:00.0 reporter fw
> pci/0000:02:00.0:
>   reporter fw
>     state healthy error 0 recover 0
> # devlink health diagnose pci/0000:02:00.0 reporter fw
> Status: normal
>
>Signed-off-by: Kamal Heib <[email protected]>
>---
> drivers/net/ethernet/intel/i40e/i40e.h        |  1 +
> .../net/ethernet/intel/i40e/i40e_devlink.c    | 57 +++++++++++++++++++
> .../net/ethernet/intel/i40e/i40e_devlink.h    |  2 +
> drivers/net/ethernet/intel/i40e/i40e_main.c   | 15 +++++
> 4 files changed, 75 insertions(+)
>
>diff --git a/drivers/net/ethernet/intel/i40e/i40e.h 
>b/drivers/net/ethernet/intel/i40e/i40e.h
>index d546567e0286..f94671b6e7c6 100644
>--- a/drivers/net/ethernet/intel/i40e/i40e.h
>+++ b/drivers/net/ethernet/intel/i40e/i40e.h
>@@ -465,6 +465,7 @@ static inline const u8 *i40e_channel_mac(struct 
>i40e_channel *ch)
> struct i40e_pf {
>       struct pci_dev *pdev;
>       struct devlink_port devlink_port;
>+      struct devlink_health_reporter *fw_health_report;
>       struct i40e_hw hw;
>       DECLARE_BITMAP(state, __I40E_STATE_SIZE__);
>       struct msix_entry *msix_entries;
>diff --git a/drivers/net/ethernet/intel/i40e/i40e_devlink.c 
>b/drivers/net/ethernet/intel/i40e/i40e_devlink.c
>index cc4e9e2addb7..ad91c150cdba 100644
>--- a/drivers/net/ethernet/intel/i40e/i40e_devlink.c
>+++ b/drivers/net/ethernet/intel/i40e/i40e_devlink.c
>@@ -122,6 +122,25 @@ static int i40e_devlink_info_get(struct devlink *dl,
>       return err;
> }
> 
>+static int i40e_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
>+                                   struct devlink_fmsg *fmsg,
>+                                   struct netlink_ext_ack *extack)
>+{
>+      struct i40e_pf *pf = devlink_health_reporter_priv(reporter);
>+
>+      if (test_bit(__I40E_RECOVERY_MODE, pf->state))
>+              devlink_fmsg_string_pair_put(fmsg, "Status", "recovery");

Is it "Status" or "Mode" ?


>+      else
>+              devlink_fmsg_string_pair_put(fmsg, "Status", "normal");
>+
>+      return 0;
>+}
>+
>+static const struct devlink_health_reporter_ops i40e_fw_reporter_ops = {
>+      .name = "fw",
>+      .diagnose = i40e_fw_reporter_diagnose,
>+};
>+
> static const struct devlink_ops i40e_devlink_ops = {
>       .info_get = i40e_devlink_info_get,
> };
>@@ -233,3 +252,41 @@ void i40e_devlink_destroy_port(struct i40e_pf *pf)
> {
>       devlink_port_unregister(&pf->devlink_port);
> }
>+
>+/**
>+ * i40e_devlink_create_health_reporter - Create the health reporter for this 
>PF
>+ * @pf: the PF to create reporter for
>+ *
>+ * Create health reporter for this PF.
>+ *
>+ * Return: zero on success or an error code on failure.
>+ **/
>+int i40e_devlink_create_health_reporter(struct i40e_pf *pf)
>+{
>+      struct devlink *devlink = priv_to_devlink(pf);
>+      struct device *dev = &pf->pdev->dev;
>+      int rc = 0;
>+
>+      devl_lock(devlink);
>+      pf->fw_health_report =
>+              devl_health_reporter_create(devlink, &i40e_fw_reporter_ops, 0, 
>pf);
>+      if (IS_ERR(pf->fw_health_report)) {
>+              rc = PTR_ERR(pf->fw_health_report);
>+              dev_err(dev, "Failed to create fw reporter, err = %d\n", rc);
>+      }
>+      devl_unlock(devlink);
>+
>+      return rc;
>+}
>+
>+/**
>+ * i40e_devlink_destroy_health_reporter - Destroy the health reporter
>+ * @pf: the PF to cleanup
>+ *
>+ * Destroy the health reporter
>+ **/
>+void i40e_devlink_destroy_health_reporter(struct i40e_pf *pf)
>+{
>+      if (!IS_ERR_OR_NULL(pf->fw_health_report))
>+              devlink_health_reporter_destroy(pf->fw_health_report);
>+}
>diff --git a/drivers/net/ethernet/intel/i40e/i40e_devlink.h 
>b/drivers/net/ethernet/intel/i40e/i40e_devlink.h
>index 469fb3d2ee25..018679094bb5 100644
>--- a/drivers/net/ethernet/intel/i40e/i40e_devlink.h
>+++ b/drivers/net/ethernet/intel/i40e/i40e_devlink.h
>@@ -14,5 +14,7 @@ void i40e_devlink_register(struct i40e_pf *pf);
> void i40e_devlink_unregister(struct i40e_pf *pf);
> int i40e_devlink_create_port(struct i40e_pf *pf);
> void i40e_devlink_destroy_port(struct i40e_pf *pf);
>+int i40e_devlink_create_health_reporter(struct i40e_pf *pf);
>+void i40e_devlink_destroy_health_reporter(struct i40e_pf *pf);
> 
> #endif /* _I40E_DEVLINK_H_ */
>diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c 
>b/drivers/net/ethernet/intel/i40e/i40e_main.c
>index cbcfada7b357..13cad5f58029 100644
>--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
>+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
>@@ -15370,6 +15370,9 @@ static bool i40e_check_recovery_mode(struct i40e_pf 
>*pf)
>               dev_crit(&pf->pdev->dev, "Firmware recovery mode detected. 
> Limiting functionality.\n");
>               dev_crit(&pf->pdev->dev, "Refer to the Intel(R) Ethernet 
> Adapters and Devices User Guide for details on firmware recovery mode.\n");
>               set_bit(__I40E_RECOVERY_MODE, pf->state);
>+              if (pf->fw_health_report)
>+                      devlink_health_report(pf->fw_health_report,
>+                                            "FW recovery mode detected", pf);

You report it on "FW" reporter. Why "FW" is needed in the message?


> 
>               return true;
>       }
>@@ -15636,6 +15639,14 @@ static int i40e_probe(struct pci_dev *pdev, const 
>struct pci_device_id *ent)
>               err = -ENOMEM;
>               goto err_pf_alloc;
>       }
>+
>+      err = i40e_devlink_create_health_reporter(pf);
>+      if (err) {
>+              dev_err(&pdev->dev,
>+                      "Failed to create health reporter %d\n", err);
>+              goto err_health_reporter;
>+      }
>+
>       pf->next_vsi = 0;
>       pf->pdev = pdev;
>       set_bit(__I40E_DOWN, pf->state);
>@@ -16180,6 +16191,8 @@ static int i40e_probe(struct pci_dev *pdev, const 
>struct pci_device_id *ent)
> err_pf_reset:
>       iounmap(hw->hw_addr);
> err_ioremap:
>+      i40e_devlink_destroy_health_reporter(pf);
>+err_health_reporter:
>       i40e_free_pf(pf);
> err_pf_alloc:
>       pci_release_mem_regions(pdev);
>@@ -16209,6 +16222,8 @@ static void i40e_remove(struct pci_dev *pdev)
> 
>       i40e_devlink_unregister(pf);
> 
>+      i40e_devlink_destroy_health_reporter(pf);
>+
>       i40e_dbg_pf_exit(pf);
> 
>       i40e_ptp_stop(pf);
>-- 
>2.45.2
>
>

Reply via email to