Use fault-inject framework to trigger punit_error_handler() for testing. Usage: echo 100 > .../inject_punit_error/probability echo 1 > .../inject_punit_error/times
Signed-off-by: Mallesh Koujalagi <[email protected]> --- drivers/gpu/drm/xe/xe_debugfs.c | 3 +++ drivers/gpu/drm/xe/xe_ras.c | 14 ++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index 22b471303984..3a90deb5596c 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -40,6 +40,7 @@ DECLARE_FAULT_ATTR(gt_reset_failure); DECLARE_FAULT_ATTR(inject_csc_hw_error); +DECLARE_FAULT_ATTR(inject_punit_error); static void read_residency_counter(struct xe_device *xe, struct xe_mmio *mmio, u32 offset, const char *name, struct drm_printer *p) @@ -612,6 +613,8 @@ void xe_debugfs_register(struct xe_device *xe) fault_create_debugfs_attr("fail_gt_reset", root, >_reset_failure); + fault_create_debugfs_attr("inject_punit_error", root, &inject_punit_error); + if (IS_SRIOV_PF(xe)) xe_sriov_pf_debugfs_register(xe, root); else if (IS_SRIOV_VF(xe)) diff --git a/drivers/gpu/drm/xe/xe_ras.c b/drivers/gpu/drm/xe/xe_ras.c index 5a2fee5a1308..bb8502472f73 100644 --- a/drivers/gpu/drm/xe/xe_ras.c +++ b/drivers/gpu/drm/xe/xe_ras.c @@ -3,6 +3,8 @@ * Copyright © 2026 Intel Corporation */ +#include <linux/fault-inject.h> + #include "xe_assert.h" #include "xe_bo.h" #include "xe_device.h" @@ -690,6 +692,13 @@ void xe_ras_init(struct xe_device *xe) xe_drm_ras_init(xe); } +extern struct fault_attr inject_punit_error; + +static bool fault_inject_punit_error(void) +{ + return IS_ENABLED(CONFIG_DEBUG_FS) && should_fail(&inject_punit_error, 1); +} + /** * xe_ras_process_errors() - Process and contain hardware errors * @xe: xe device instance @@ -708,6 +717,11 @@ enum xe_ras_recovery_action xe_ras_process_errors(struct xe_device *xe) size_t rlen; int ret; + if (fault_inject_punit_error()) { + punit_error_handler(xe); + return XE_RAS_RECOVERY_ACTION_DISCONNECT; + } + if (!xe->info.has_sysctrl) return XE_RAS_RECOVERY_ACTION_RESET; -- 2.34.1
