On 5/5/26 13:20, Hamza Mahfooz wrote:
DMU already has robust hung state tracking, but timeout recovery
was never hooked up, so do so now.

Reviewed-by: Leo Li <[email protected]>
Signed-off-by: Hamza Mahfooz <[email protected]>
Reviewed-by: Mario Limonciello (AMD) <[email protected]>
---
  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c   | 13 ++++++++-----
  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h   |  1 +
  .../drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c   | 12 ++++++++++--
  3 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index e96a12ff2d31..763da9a9032d 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1246,7 +1246,7 @@ static  void amdgpu_dm_audio_eld_notify(struct 
amdgpu_device *adev, int pin)
        }
  }
-static int dm_dmub_hw_init(struct amdgpu_device *adev)
+int amdgpu_dm_dmub_hw_init(struct amdgpu_device *adev)
  {
        const struct dmcub_firmware_header_v1_0 *hdr;
        struct dmub_srv *dmub_srv = adev->dm.dmub_srv;
@@ -1315,7 +1315,7 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
        /* if adev->firmware.load_type == AMDGPU_FW_LOAD_PSP,
         * amdgpu_ucode_init_single_fw will load dmub firmware
         * fw_inst_const part to cw0; otherwise, the firmware back door load
-        * will be done by dm_dmub_hw_init
+        * will be done by amdgpu_dm_dmub_hw_init().
         */
        if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
                memcpy(fb_info->fb[DMUB_WINDOW_0_INST_CONST].cpu_addr, 
fw_inst_const,
@@ -1457,7 +1457,7 @@ static void dm_dmub_hw_resume(struct amdgpu_device *adev)
                        drm_warn(adev_to_drm(adev), "Wait for DMUB auto-load failed: 
%d\n", status);
        } else {
                /* Perform the full hardware initialization. */
-               r = dm_dmub_hw_init(adev);
+               r = amdgpu_dm_dmub_hw_init(adev);
                if (r)
                        drm_err(adev_to_drm(adev), "DMUB interface failed to 
initialize: status=%d\n", r);
        }
@@ -2041,6 +2041,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
                goto error;
        }
+ adev->dm.dc->debug.enable_dmu_recovery =
+               amdgpu_device_should_recover_gpu(adev);
+
        if (amdgpu_dc_debug_mask & DC_DISABLE_PIPE_SPLIT) {
                adev->dm.dc->debug.force_single_disp_pipe_split = false;
                adev->dm.dc->debug.pipe_split_policy = MPC_SPLIT_AVOID;
@@ -2090,7 +2093,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
        if (adev->dm.dc->caps.dp_hdmi21_pcon_support)
                drm_info(adev_to_drm(adev), "DP-HDMI FRL PCON supported\n");
- r = dm_dmub_hw_init(adev);
+       r = amdgpu_dm_dmub_hw_init(adev);
        if (r) {
                drm_err(adev_to_drm(adev), "DMUB interface failed to initialize: 
status=%d\n", r);
                goto error;
@@ -3604,7 +3607,7 @@ static int dm_resume(struct amdgpu_ip_block *ip_block)
                 */
                link_enc_cfg_copy(adev->dm.dc->current_state, dc_state);
- r = dm_dmub_hw_init(adev);
+               r = amdgpu_dm_dmub_hw_init(adev);
                if (r) {
                        drm_err(adev_to_drm(adev), "DMUB interface failed to 
initialize: status=%d\n", r);
                        return r;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 74a8fe1a1999..dc808ee83c2a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -1086,6 +1086,7 @@ int amdgpu_dm_verify_lut3d_size(struct amdgpu_device 
*adev,
  #define MAX_COLOR_LEGACY_LUT_ENTRIES 256
void amdgpu_dm_init_color_mod(void);
+int amdgpu_dm_dmub_hw_init(struct amdgpu_device *adev);
  int amdgpu_dm_create_color_properties(struct amdgpu_device *adev);
  int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state);
  int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index 3b8ae7798a93..8f10117483e2 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -33,6 +33,7 @@
  #include <drm/drm_atomic.h>
  #include <drm/drm_probe_helper.h>
  #include <drm/amdgpu_drm.h>
+#include <drm/drm_drv.h>
  #include <drm/drm_edid.h>
  #include <drm/drm_fixed.h>
@@ -1165,8 +1166,15 @@ void dm_set_dcn_clocks(struct dc_context *ctx, struct dc_clocks *clks) void dm_helpers_dmu_timeout(struct dc_context *ctx)
  {
-       // TODO:
-       //amdgpu_device_gpu_recover(dc_context->driver-context, NULL);
+       struct amdgpu_device *adev = ctx->driver_context;
+
+       lockdep_assert_held(&adev->dm.dc_lock);
+
+       drm_info(adev_to_drm(adev), "attempting firmware reset\n");
+       if (amdgpu_dm_dmub_hw_init(adev))
+               drm_dev_wedged_event(adev_to_drm(adev),
+                                    DRM_WEDGE_RECOVERY_REBIND |
+                                    DRM_WEDGE_RECOVERY_BUS_RESET, NULL);
  }
void dm_helpers_smu_timeout(struct dc_context *ctx, unsigned int msg_id, unsigned int param, unsigned int timeout_us)

Reply via email to