Public

Series is:
Reviewed-by: Alex Deucher <[email protected]>
________________________________
From: Sunil Khatri <[email protected]>
Sent: Tuesday, June 2, 2026 2:24 PM
To: Deucher, Alexander <[email protected]>; Koenig, Christian 
<[email protected]>
Cc: [email protected] <[email protected]>; Khatri, 
Sunil <[email protected]>
Subject: [PATCH v2 1/3] drm/amdgpu: validate the mes firmware version for gfx11

MES fw should report the fw version same either read from the
register or if read from the firmware ucode. That is not the
case for MES firmware and we add a warning in case it is not
same.

Signed-off-by: Sunil Khatri <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 12 ++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h |  1 +
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c  |  1 +
 3 files changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index c9467b26e42c..e3972673fd64 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -781,6 +781,18 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, 
int pipe)
         return r;
 }

+void amdgpu_mes_validate_fw_version(struct amdgpu_device *adev)
+{
+       u32 fw_from_ucode = adev->mes.fw_version[AMDGPU_MES_SCHED_PIPE];
+       u32 fw_from_reg = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
+
+       if (fw_from_ucode != fw_from_reg)
+               dev_info(adev->dev,
+                        "MES firmware reports incorrect version in ucode 
binary (0x%x vs 0x%x)\n",
+                        fw_from_ucode, fw_from_reg);
+}
+
+
 bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev)
 {
         uint32_t mes_rev = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 93990d4990f2..fdd06a17520a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -441,6 +441,7 @@ struct amdgpu_mes_funcs {
         (adev)->mes.kiq_hw_fini((adev), (xcc_id))

 int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe);
+void amdgpu_mes_validate_fw_version(struct amdgpu_device *adev);
 int amdgpu_mes_init(struct amdgpu_device *adev);
 void amdgpu_mes_fini(struct amdgpu_device *adev);

diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index a926a330700e..0db378d126fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -1686,6 +1686,7 @@ static int mes_v11_0_hw_init(struct amdgpu_ip_block 
*ip_block)
         if (r)
                 goto failure;

+       amdgpu_mes_validate_fw_version(adev);
 out:
         /*
          * Disable KIQ ring usage from the driver once MES is enabled.
--
2.34.1

Reply via email to