From: Dafna Hirschfeld <dhirschf...@habana.ai>

Since hw_fini return error code for failure indication, we should
check its return value. Currently it might only fail upon soft-reset
from hl_device_reset. Later patch will add hw_fini failure in case of
polling timeout in hard-reset.

Signed-off-by: Dafna Hirschfeld <dhirschf...@habana.ai>
Reviewed-by: Oded Gabbay <ogab...@kernel.org>
Signed-off-by: Oded Gabbay <ogab...@kernel.org>
---
 drivers/accel/habanalabs/common/device.c | 12 +++++++++---
 drivers/accel/habanalabs/gaudi/gaudi.c   |  7 ++++++-
 drivers/accel/habanalabs/gaudi2/gaudi2.c |  7 ++++++-
 drivers/accel/habanalabs/goya/goya.c     |  7 ++++++-
 4 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/drivers/accel/habanalabs/common/device.c 
b/drivers/accel/habanalabs/common/device.c
index 7ade32487138..99e793dfb126 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -1472,7 +1472,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
                schedule_hard_reset = false, delay_reset, from_dev_release, 
from_watchdog_thread;
        u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
        struct hl_ctx *ctx;
-       int i, rc;
+       int i, rc, hw_fini_rc;
 
        if (!hdev->init_done) {
                dev_err(hdev->dev, "Can't reset before initialization is 
done\n");
@@ -1634,7 +1634,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
        }
 
        /* Reset the H/W. It will be in idle state after this returns */
-       hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset);
+       hw_fini_rc = hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset);
 
        if (hard_reset) {
                hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE;
@@ -1661,6 +1661,10 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
                hl_ctx_put(ctx);
        }
 
+       if (hw_fini_rc) {
+               rc = hw_fini_rc;
+               goto out_err;
+       }
        /* Finished tear-down, starting to re-initialize */
 
        if (hard_reset) {
@@ -2416,7 +2420,9 @@ void hl_device_fini(struct hl_device *hdev)
        hl_cb_pool_fini(hdev);
 
        /* Reset the H/W. It will be in idle state after this returns */
-       hdev->asic_funcs->hw_fini(hdev, true, false);
+       rc = hdev->asic_funcs->hw_fini(hdev, true, false);
+       if (rc)
+               dev_err(hdev->dev, "hw_fini failed in device fini while 
removing device %d\n", rc);
 
        hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE;
 
diff --git a/drivers/accel/habanalabs/gaudi/gaudi.c 
b/drivers/accel/habanalabs/gaudi/gaudi.c
index 26287084a9e0..60146fd4de6b 100644
--- a/drivers/accel/habanalabs/gaudi/gaudi.c
+++ b/drivers/accel/habanalabs/gaudi/gaudi.c
@@ -868,13 +868,18 @@ static int gaudi_early_init(struct hl_device *hdev)
        rc = hl_fw_read_preboot_status(hdev);
        if (rc) {
                if (hdev->reset_on_preboot_fail)
+                       /* we are already on failure flow, so don't check if 
hw_fini fails. */
                        hdev->asic_funcs->hw_fini(hdev, true, false);
                goto pci_fini;
        }
 
        if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
                dev_dbg(hdev->dev, "H/W state is dirty, must reset before 
initializing\n");
-               hdev->asic_funcs->hw_fini(hdev, true, false);
+               rc = hdev->asic_funcs->hw_fini(hdev, true, false);
+               if (rc) {
+                       dev_err(hdev->dev, "failed to reset HW in dirty state 
(%d)\n", rc);
+                       goto pci_fini;
+               }
        }
 
        return 0;
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c 
b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index 82448edfdfa0..f01fa4bca381 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -2886,13 +2886,18 @@ static int gaudi2_early_init(struct hl_device *hdev)
        rc = hl_fw_read_preboot_status(hdev);
        if (rc) {
                if (hdev->reset_on_preboot_fail)
+                       /* we are already on failure flow, so don't check if 
hw_fini fails. */
                        hdev->asic_funcs->hw_fini(hdev, true, false);
                goto pci_fini;
        }
 
        if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
                dev_dbg(hdev->dev, "H/W state is dirty, must reset before 
initializing\n");
-               hdev->asic_funcs->hw_fini(hdev, true, false);
+               rc = hdev->asic_funcs->hw_fini(hdev, true, false);
+               if (rc) {
+                       dev_err(hdev->dev, "failed to reset HW during early 
init (%d)\n", rc);
+                       goto pci_fini;
+               }
        }
 
        return 0;
diff --git a/drivers/accel/habanalabs/goya/goya.c 
b/drivers/accel/habanalabs/goya/goya.c
index 7a45ab3ca43a..39f9e5de1f4c 100644
--- a/drivers/accel/habanalabs/goya/goya.c
+++ b/drivers/accel/habanalabs/goya/goya.c
@@ -669,13 +669,18 @@ static int goya_early_init(struct hl_device *hdev)
        rc = hl_fw_read_preboot_status(hdev);
        if (rc) {
                if (hdev->reset_on_preboot_fail)
+                       /* we are already on failure flow, so don't check if 
hw_fini fails. */
                        hdev->asic_funcs->hw_fini(hdev, true, false);
                goto pci_fini;
        }
 
        if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
                dev_dbg(hdev->dev, "H/W state is dirty, must reset before 
initializing\n");
-               hdev->asic_funcs->hw_fini(hdev, true, false);
+               rc = hdev->asic_funcs->hw_fini(hdev, true, false);
+               if (rc) {
+                       dev_err(hdev->dev, "failed to reset HW in dirty state 
(%d)\n", rc);
+                       goto pci_fini;
+               }
        }
 
        if (!hdev->pldm) {
-- 
2.39.2

Reply via email to