Introduce DRM_WEDGE_RECOVERY_COLD_RESET (BIT(4)) recovery method to
handle critical errors requiring complete device power cycling.

This method addresses scenarios where recovery mechanisms
(driver reload, PCIe reset, etc.) are insufficient to restore
device functionality. When set, it indicates to userspace that
only a full cold reset can recover the device from its current error
state. The cold reset method serves as a last resort for critical
errors.

Signed-off-by: Mallesh Koujalagi <[email protected]>
---
 drivers/gpu/drm/drm_drv.c | 2 ++
 include/drm/drm_device.h  | 1 +
 2 files changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 2915118436ce..48d269d470a3 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -534,6 +534,8 @@ static const char *drm_get_wedge_recovery(unsigned int opt)
                return "bus-reset";
        case DRM_WEDGE_RECOVERY_VENDOR:
                return "vendor-specific";
+       case DRM_WEDGE_RECOVERY_COLD_RESET:
+               return "cold-reset";
        default:
                return NULL;
        }
diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h
index bc78fb77cc27..3e386eb42023 100644
--- a/include/drm/drm_device.h
+++ b/include/drm/drm_device.h
@@ -37,6 +37,7 @@ struct pci_controller;
 #define DRM_WEDGE_RECOVERY_REBIND      BIT(1)  /* unbind + bind driver */
 #define DRM_WEDGE_RECOVERY_BUS_RESET   BIT(2)  /* unbind + reset bus device + 
bind */
 #define DRM_WEDGE_RECOVERY_VENDOR      BIT(3)  /* vendor specific recovery 
method */
+#define DRM_WEDGE_RECOVERY_COLD_RESET  BIT(4)  /* full device cold reset */
 
 /**
  * struct drm_wedge_task_info - information about the guilty task of a wedge 
dev
-- 
2.34.1

Reply via email to