Use fault-inject framework to trigger punit_error_handler()
for testing.

Usage:
  echo 100 > .../inject_punit_error/probability
  echo 1   > .../inject_punit_error/times

Signed-off-by: Mallesh Koujalagi <[email protected]>
---
 drivers/gpu/drm/xe/xe_debugfs.c |  3 +++
 drivers/gpu/drm/xe/xe_ras.c     | 14 ++++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index 22b471303984..3a90deb5596c 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -40,6 +40,7 @@
 
 DECLARE_FAULT_ATTR(gt_reset_failure);
 DECLARE_FAULT_ATTR(inject_csc_hw_error);
+DECLARE_FAULT_ATTR(inject_punit_error);
 
 static void read_residency_counter(struct xe_device *xe, struct xe_mmio *mmio,
                                   u32 offset, const char *name, struct 
drm_printer *p)
@@ -612,6 +613,8 @@ void xe_debugfs_register(struct xe_device *xe)
 
        fault_create_debugfs_attr("fail_gt_reset", root, &gt_reset_failure);
 
+       fault_create_debugfs_attr("inject_punit_error", root, 
&inject_punit_error);
+
        if (IS_SRIOV_PF(xe))
                xe_sriov_pf_debugfs_register(xe, root);
        else if (IS_SRIOV_VF(xe))
diff --git a/drivers/gpu/drm/xe/xe_ras.c b/drivers/gpu/drm/xe/xe_ras.c
index 5a2fee5a1308..bb8502472f73 100644
--- a/drivers/gpu/drm/xe/xe_ras.c
+++ b/drivers/gpu/drm/xe/xe_ras.c
@@ -3,6 +3,8 @@
  * Copyright © 2026 Intel Corporation
  */
 
+#include <linux/fault-inject.h>
+
 #include "xe_assert.h"
 #include "xe_bo.h"
 #include "xe_device.h"
@@ -690,6 +692,13 @@ void xe_ras_init(struct xe_device *xe)
                xe_drm_ras_init(xe);
 }
 
+extern struct fault_attr inject_punit_error;
+
+static bool fault_inject_punit_error(void)
+{
+       return IS_ENABLED(CONFIG_DEBUG_FS) && should_fail(&inject_punit_error, 
1);
+}
+
 /**
  * xe_ras_process_errors() - Process and contain hardware errors
  * @xe: xe device instance
@@ -708,6 +717,11 @@ enum xe_ras_recovery_action xe_ras_process_errors(struct 
xe_device *xe)
        size_t rlen;
        int ret;
 
+       if (fault_inject_punit_error()) {
+               punit_error_handler(xe);
+               return XE_RAS_RECOVERY_ACTION_DISCONNECT;
+       }
+
        if (!xe->info.has_sysctrl)
                return XE_RAS_RECOVERY_ACTION_RESET;
 
-- 
2.34.1

Reply via email to