The runtime PM might be left in error state if one of the callbacks
returned an error, e.g. if the (auto)suspend callback failed following
a firmware crash.

When that happens, any further attempt to acquire or release a power
reference will then also fail, making it impossible to do anything else
with the GPU. The driver logic will eventually reach the reset code.

In pvr_power_reset(), replace pvr_power_get() with a new API
pvr_power_get_clear() which also attempts to clear any runtime PM error
state if acquiring a power reference is not possible.

Signed-off-by: Alessio Belle <[email protected]>
---
 drivers/gpu/drm/imagination/pvr_power.c | 59 ++++++++++++++++++++++++++++++++-
 1 file changed, 58 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/imagination/pvr_power.c 
b/drivers/gpu/drm/imagination/pvr_power.c
index 
41f5d89e78b854cf6993838868a4416a220b490a..65642ded051db83e82e32e3c3e9f82508ad8d4cc
 100644
--- a/drivers/gpu/drm/imagination/pvr_power.c
+++ b/drivers/gpu/drm/imagination/pvr_power.c
@@ -340,6 +340,63 @@ pvr_power_device_idle(struct device *dev)
        return pvr_power_is_idle(pvr_dev) ? 0 : -EBUSY;
 }
 
+static int
+pvr_power_clear_error(struct pvr_device *pvr_dev)
+{
+       struct device *dev = from_pvr_device(pvr_dev)->dev;
+       int err;
+
+       /* Ensure the device state is known and nothing is happening past this 
point */
+       pm_runtime_disable(dev);
+
+       /* Attempt to clear the runtime PM error by setting the current state 
again */
+       if (pm_runtime_status_suspended(dev))
+               err = pm_runtime_set_suspended(dev);
+       else
+               err = pm_runtime_set_active(dev);
+
+       if (err) {
+               drm_err(from_pvr_device(pvr_dev),
+                       "%s: Failed to clear runtime PM error (new error %d)\n",
+                       __func__, err);
+       }
+
+       pm_runtime_enable(dev);
+
+       return err;
+}
+
+/**
+ * pvr_power_get_clear() - Acquire a power reference, correcting any errors
+ * @pvr_dev: Device pointer
+ *
+ * Attempt to acquire a power reference on the device. If the runtime PM
+ * is in error state, attempt to clear the error and retry.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * Any error code returned by pvr_power_get() or the runtime PM API.
+ */
+static int
+pvr_power_get_clear(struct pvr_device *pvr_dev)
+{
+       int err;
+
+       err = pvr_power_get(pvr_dev);
+       if (err == 0)
+               return err;
+
+       drm_warn(from_pvr_device(pvr_dev),
+                "%s: pvr_power_get returned error %d, attempting recovery\n",
+                __func__, err);
+
+       err = pvr_power_clear_error(pvr_dev);
+       if (err)
+               return err;
+
+       return pvr_power_get(pvr_dev);
+}
+
 /**
  * pvr_power_reset() - Reset the GPU
  * @pvr_dev: Device pointer
@@ -364,7 +421,7 @@ pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset)
         * Take a power reference during the reset. This should prevent any 
interference with the
         * power state during reset.
         */
-       WARN_ON(pvr_power_get(pvr_dev));
+       WARN_ON(pvr_power_get_clear(pvr_dev));
 
        down_write(&pvr_dev->reset_sem);
 

---
base-commit: 1a45ef022f0364186d4fb2f4e5255dcae1ff638a
change-id: 20250619-clear-rpm-errors-gpu-reset-359ecbc85689

Best regards,
-- 
Alessio Belle <[email protected]>

Reply via email to