RE: [PATCH v2 1/2] drm/amdgpu: implement smu send rma reason for smu v13.0.6

2024-02-07 Thread Wang, Yang(Kevin)
[AMD Official Use Only - General]

Thanks , I will correct it before submitting.

Best Regards,
Kevin

-Original Message-
From: Zhang, Hawking 
Sent: Wednesday, February 7, 2024 10:00 PM
To: Wang, Yang(Kevin) ; amd-gfx@lists.freedesktop.org
Cc: Zhou1, Tao ; Lazar, Lijo 
Subject: RE: [PATCH v2 1/2] drm/amdgpu: implement smu send rma reason for smu 
v13.0.6

[AMD Official Use Only - General]

With a nitpick below, the series is

Reviewed-by: Hawking Zhang 

+   MSG_MAP(BadPageThreshold,
PPSMC_MSG_RmaDueToBadPageThreshold,0),

Might be better name it to RmaDueToBadPageThreshold/SMU_MSG_ 
RmaDueToBadPageThreshold

Regards,
Hawking

-Original Message-
From: Wang, Yang(Kevin) 
Sent: Wednesday, February 7, 2024 21:54
To: amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking ; Zhou1, Tao ; 
Lazar, Lijo ; Wang, Yang(Kevin) 
Subject: [PATCH v2 1/2] drm/amdgpu: implement smu send rma reason for smu 
v13.0.6

implement smu send rma reason function for smu v13.0.6

Signed-off-by: Yang Wang 
Reviewed-by: Tao Zhou 
---
 drivers/gpu/drm/amd/pm/amdgpu_dpm.c   | 15 ++
 drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h   |  1 +
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 10 ++
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  6 ++  
.../pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h  |  3 ++-  
drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h  |  3 ++-  
.../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  | 20 +++
 7 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 6627ee07d52d..f84bfed50681 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -693,6 +693,21 @@ int amdgpu_dpm_send_hbm_bad_channel_flag(struct 
amdgpu_device *adev, uint32_t si
return ret;
 }

+int amdgpu_dpm_send_rma_reason(struct amdgpu_device *adev) {
+   struct smu_context *smu = adev->powerplay.pp_handle;
+   int ret;
+
+   if (!is_support_sw_smu(adev))
+   return -EOPNOTSUPP;
+
+   mutex_lock(>pm.mutex);
+   ret = smu_send_rma_reason(smu);
+   mutex_unlock(>pm.mutex);
+
+   return ret;
+}
+
 int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev,
  enum pp_clock_type type,
  uint32_t *min, diff --git 
a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h 
b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
index 3047ffe7f244..621200e0823f 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
@@ -450,6 +450,7 @@ int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, 
uint32_t *smu_versio  int amdgpu_dpm_handle_passthrough_sbr(struct 
amdgpu_device *adev, bool enable);  int 
amdgpu_dpm_send_hbm_bad_pages_num(struct amdgpu_device *adev, uint32_t size);  
int amdgpu_dpm_send_hbm_bad_channel_flag(struct amdgpu_device *adev, uint32_t 
size);
+int amdgpu_dpm_send_rma_reason(struct amdgpu_device *adev);
 int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev,
   enum pp_clock_type type,
   uint32_t *min, diff --git 
a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 0ad947df777a..138dcb8724b6 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -3669,3 +3669,13 @@ int smu_send_hbm_bad_channel_flag(struct smu_context 
*smu, uint32_t size)

return ret;
 }
+
+int smu_send_rma_reason(struct smu_context *smu) {
+   int ret = 0;
+
+   if (smu->ppt_funcs && smu->ppt_funcs->send_rma_reason)
+   ret = smu->ppt_funcs->send_rma_reason(smu);
+
+   return ret;
+}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 66e84defd0b6..a870bdd49a4e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -1341,6 +1341,11 @@ struct pptable_funcs {
 */
int (*send_hbm_bad_pages_num)(struct smu_context *smu, uint32_t size);

+   /**
+* @send_rma_reason: message rma reason event to SMU.
+*/
+   int (*send_rma_reason)(struct smu_context *smu);
+
/**
 * @get_ecc_table:  message SMU to get ECC INFO table.
 */
@@ -1588,5 +1593,6 @@ int smu_stb_collect_info(struct smu_context *smu, void 
*buff, uint32_t size);  void amdgpu_smu_stb_debug_fs_init(struct amdgpu_device 
*adev);  int smu_send_hbm_bad_pages_num(struct smu_context *smu, uint32_t 
size);  int smu_send_hbm_bad_channel_flag(struct smu_context *smu, uint32_t 
size);
+int smu_send_rma_reason(struct smu_context *smu);
 #endif
 #endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h
index 5

RE: [PATCH v2 1/2] drm/amdgpu: implement smu send rma reason for smu v13.0.6

2024-02-07 Thread Zhang, Hawking
[AMD Official Use Only - General]

With a nitpick below, the series is

Reviewed-by: Hawking Zhang 

+   MSG_MAP(BadPageThreshold,
PPSMC_MSG_RmaDueToBadPageThreshold,0),

Might be better name it to RmaDueToBadPageThreshold/SMU_MSG_ 
RmaDueToBadPageThreshold

Regards,
Hawking

-Original Message-
From: Wang, Yang(Kevin) 
Sent: Wednesday, February 7, 2024 21:54
To: amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking ; Zhou1, Tao ; 
Lazar, Lijo ; Wang, Yang(Kevin) 
Subject: [PATCH v2 1/2] drm/amdgpu: implement smu send rma reason for smu 
v13.0.6

implement smu send rma reason function for smu v13.0.6

Signed-off-by: Yang Wang 
Reviewed-by: Tao Zhou 
---
 drivers/gpu/drm/amd/pm/amdgpu_dpm.c   | 15 ++
 drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h   |  1 +
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 10 ++
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  6 ++  
.../pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h  |  3 ++-  
drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h  |  3 ++-  
.../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  | 20 +++
 7 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 6627ee07d52d..f84bfed50681 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -693,6 +693,21 @@ int amdgpu_dpm_send_hbm_bad_channel_flag(struct 
amdgpu_device *adev, uint32_t si
return ret;
 }

+int amdgpu_dpm_send_rma_reason(struct amdgpu_device *adev) {
+   struct smu_context *smu = adev->powerplay.pp_handle;
+   int ret;
+
+   if (!is_support_sw_smu(adev))
+   return -EOPNOTSUPP;
+
+   mutex_lock(>pm.mutex);
+   ret = smu_send_rma_reason(smu);
+   mutex_unlock(>pm.mutex);
+
+   return ret;
+}
+
 int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev,
  enum pp_clock_type type,
  uint32_t *min,
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h 
b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
index 3047ffe7f244..621200e0823f 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
@@ -450,6 +450,7 @@ int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, 
uint32_t *smu_versio  int amdgpu_dpm_handle_passthrough_sbr(struct 
amdgpu_device *adev, bool enable);  int 
amdgpu_dpm_send_hbm_bad_pages_num(struct amdgpu_device *adev, uint32_t size);  
int amdgpu_dpm_send_hbm_bad_channel_flag(struct amdgpu_device *adev, uint32_t 
size);
+int amdgpu_dpm_send_rma_reason(struct amdgpu_device *adev);
 int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev,
   enum pp_clock_type type,
   uint32_t *min,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 0ad947df777a..138dcb8724b6 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -3669,3 +3669,13 @@ int smu_send_hbm_bad_channel_flag(struct smu_context 
*smu, uint32_t size)

return ret;
 }
+
+int smu_send_rma_reason(struct smu_context *smu) {
+   int ret = 0;
+
+   if (smu->ppt_funcs && smu->ppt_funcs->send_rma_reason)
+   ret = smu->ppt_funcs->send_rma_reason(smu);
+
+   return ret;
+}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 66e84defd0b6..a870bdd49a4e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -1341,6 +1341,11 @@ struct pptable_funcs {
 */
int (*send_hbm_bad_pages_num)(struct smu_context *smu, uint32_t size);

+   /**
+* @send_rma_reason: message rma reason event to SMU.
+*/
+   int (*send_rma_reason)(struct smu_context *smu);
+
/**
 * @get_ecc_table:  message SMU to get ECC INFO table.
 */
@@ -1588,5 +1593,6 @@ int smu_stb_collect_info(struct smu_context *smu, void 
*buff, uint32_t size);  void amdgpu_smu_stb_debug_fs_init(struct amdgpu_device 
*adev);  int smu_send_hbm_bad_pages_num(struct smu_context *smu, uint32_t 
size);  int smu_send_hbm_bad_channel_flag(struct smu_context *smu, uint32_t 
size);
+int smu_send_rma_reason(struct smu_context *smu);
 #endif
 #endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h
index 509e3cd483fb..86758051cb93 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h
@@ -91,7 +91,8 @@
 #define PPSMC_MSG_QueryValidMcaCeCount  0x3A
 #define PPSMC_MSG_McaBankCeDumpDW   0x3B
 #define PPSMC_MSG_SelectPLPDMode0x40
-#define PPSMC_Message_Count