RE: [PATCH v2 3/3] drm/amd/pm: Use gpu_metrics_v1_4 for SMUv13.0.6

2023-10-09 Thread Kamal, Asad
[AMD Official Use Only - General]

-Original Message-
From: Lazar, Lijo 
Sent: Monday, October 9, 2023 7:59 PM
To: Kamal, Asad ; amd-gfx@lists.freedesktop.org
Cc: Ma, Le ; Zhang, Morris ; Zhang, Hawking 

Subject: Re: [PATCH v2 3/3] drm/amd/pm: Use gpu_metrics_v1_4 for SMUv13.0.6



On 10/9/2023 5:28 PM, Lazar, Lijo wrote:
>
>
> On 10/6/2023 8:11 PM, Asad Kamal wrote:
>> Use gpu_metrics_v1_4 for SMUv13.0.6 to fill gpu metric info
>>
>> Signed-off-by: Asad Kamal 
>
> Series is:
>  Reviewed-by: Lijo Lazar 
>

On a second thought, since there is no FW release yet with FW metrics table v9 
support, suggest to drop patch 1 and pcie_bandwidth_inst  value assignment. 
Will keep the field as place holder till there is a FW update.
[Kamal, Asad] Will send a v3 with the changes.

Thanks & Regards
Asad

Thanks,
Lijo

> Thanks,
> Lijo
>
>> ---
>>   .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  | 67
>> ---
>>   1 file changed, 43 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
>> b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
>> index ce971a93d28b..3a07f1c95e45 100644
>> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
>> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
>> @@ -279,7 +279,7 @@ static int smu_v13_0_6_tables_init(struct
>> smu_context *smu)
>>   return -ENOMEM;
>>   smu_table->metrics_time = 0;
>> -smu_table->gpu_metrics_table_size = sizeof(struct
>> gpu_metrics_v1_3);
>> +smu_table->gpu_metrics_table_size = sizeof(struct
>> +gpu_metrics_v1_4);
>>   smu_table->gpu_metrics_table =
>>   kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
>>   if (!smu_table->gpu_metrics_table) { @@ -1969,22 +1969,19 @@
>> static int smu_v13_0_6_get_current_pcie_link_speed(struct smu_context
>> *smu)
>>   static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu,
>> void **table)
>>   {
>>   struct smu_table_context *smu_table = >smu_table;
>> -struct gpu_metrics_v1_3 *gpu_metrics =
>> -(struct gpu_metrics_v1_3 *)smu_table->gpu_metrics_table;
>> +struct gpu_metrics_v1_4 *gpu_metrics =
>> +(struct gpu_metrics_v1_4 *)smu_table->gpu_metrics_table;
>>   struct amdgpu_device *adev = smu->adev;
>> -int ret = 0, inst0, xcc0;
>> +int ret = 0, xcc_id, inst, i;
>>   MetricsTable_t *metrics;
>>   u16 link_width_level;
>> -inst0 = adev->sdma.instance[0].aid_id;
>> -xcc0 = GET_INST(GC, 0);
>> -
>>   metrics = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL);
>>   ret = smu_v13_0_6_get_metrics_table(smu, metrics, true);
>>   if (ret)
>>   return ret;
>> -smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 3);
>> +smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 4);
>>   gpu_metrics->temperature_hotspot =
>>   SMUQ10_ROUND(metrics->MaxSocketTemperature);
>> @@ -2000,30 +1997,38 @@ static ssize_t
>> smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
>>   gpu_metrics->average_umc_activity =
>>   SMUQ10_ROUND(metrics->DramBandwidthUtilization);
>> -gpu_metrics->average_socket_power =
>> +gpu_metrics->curr_socket_power =
>>   SMUQ10_ROUND(metrics->SocketPower);
>>   /* Energy counter reported in 15.259uJ (2^-16) units */
>>   gpu_metrics->energy_accumulator = metrics->SocketEnergyAcc;
>> -gpu_metrics->current_gfxclk =
>> -SMUQ10_ROUND(metrics->GfxclkFrequency[xcc0]);
>> -gpu_metrics->current_socclk =
>> -SMUQ10_ROUND(metrics->SocclkFrequency[inst0]);
>> -gpu_metrics->current_uclk =
>> SMUQ10_ROUND(metrics->UclkFrequency);
>> -gpu_metrics->current_vclk0 =
>> -SMUQ10_ROUND(metrics->VclkFrequency[inst0]);
>> -gpu_metrics->current_dclk0 =
>> -SMUQ10_ROUND(metrics->DclkFrequency[inst0]);
>> +for (i = 0; i < MAX_GFX_CLKS; i++) {
>> +xcc_id = GET_INST(GC, i);
>> +if (xcc_id >= 0)
>> +gpu_metrics->current_gfxclk[i] =
>> +SMUQ10_ROUND(metrics->GfxclkFrequency[xcc_id]);
>> +
>> +if (i < MAX_CLKS) {
>> +gpu_metrics->current_socclk[i] =
>> +SMUQ10_ROUND(metrics->SocclkFrequency[i]);
>> +inst = GET_INST(VCN, i);
>> +if (inst >= 0) {
>> +gpu_metrics->current_vclk0[i] =
&

Re: [PATCH v2 3/3] drm/amd/pm: Use gpu_metrics_v1_4 for SMUv13.0.6

2023-10-09 Thread Lazar, Lijo




On 10/9/2023 5:28 PM, Lazar, Lijo wrote:



On 10/6/2023 8:11 PM, Asad Kamal wrote:

Use gpu_metrics_v1_4 for SMUv13.0.6 to fill
gpu metric info

Signed-off-by: Asad Kamal 


Series is:
 Reviewed-by: Lijo Lazar 



On a second thought, since there is no FW release yet with FW metrics 
table v9 support, suggest to drop patch 1 and pcie_bandwidth_inst  value 
assignment. Will keep the field as place holder till there is a FW update.


Thanks,
Lijo


Thanks,
Lijo


---
  .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  | 67 ---
  1 file changed, 43 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c

index ce971a93d28b..3a07f1c95e45 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -279,7 +279,7 @@ static int smu_v13_0_6_tables_init(struct 
smu_context *smu)

  return -ENOMEM;
  smu_table->metrics_time = 0;
-    smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_3);
+    smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_4);
  smu_table->gpu_metrics_table =
  kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
  if (!smu_table->gpu_metrics_table) {
@@ -1969,22 +1969,19 @@ static int 
smu_v13_0_6_get_current_pcie_link_speed(struct smu_context *smu)
  static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, 
void **table)

  {
  struct smu_table_context *smu_table = >smu_table;
-    struct gpu_metrics_v1_3 *gpu_metrics =
-    (struct gpu_metrics_v1_3 *)smu_table->gpu_metrics_table;
+    struct gpu_metrics_v1_4 *gpu_metrics =
+    (struct gpu_metrics_v1_4 *)smu_table->gpu_metrics_table;
  struct amdgpu_device *adev = smu->adev;
-    int ret = 0, inst0, xcc0;
+    int ret = 0, xcc_id, inst, i;
  MetricsTable_t *metrics;
  u16 link_width_level;
-    inst0 = adev->sdma.instance[0].aid_id;
-    xcc0 = GET_INST(GC, 0);
-
  metrics = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL);
  ret = smu_v13_0_6_get_metrics_table(smu, metrics, true);
  if (ret)
  return ret;
-    smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 3);
+    smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 4);
  gpu_metrics->temperature_hotspot =
  SMUQ10_ROUND(metrics->MaxSocketTemperature);
@@ -2000,30 +1997,38 @@ static ssize_t 
smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table

  gpu_metrics->average_umc_activity =
  SMUQ10_ROUND(metrics->DramBandwidthUtilization);
-    gpu_metrics->average_socket_power =
+    gpu_metrics->curr_socket_power =
  SMUQ10_ROUND(metrics->SocketPower);
  /* Energy counter reported in 15.259uJ (2^-16) units */
  gpu_metrics->energy_accumulator = metrics->SocketEnergyAcc;
-    gpu_metrics->current_gfxclk =
-    SMUQ10_ROUND(metrics->GfxclkFrequency[xcc0]);
-    gpu_metrics->current_socclk =
-    SMUQ10_ROUND(metrics->SocclkFrequency[inst0]);
-    gpu_metrics->current_uclk = SMUQ10_ROUND(metrics->UclkFrequency);
-    gpu_metrics->current_vclk0 =
-    SMUQ10_ROUND(metrics->VclkFrequency[inst0]);
-    gpu_metrics->current_dclk0 =
-    SMUQ10_ROUND(metrics->DclkFrequency[inst0]);
+    for (i = 0; i < MAX_GFX_CLKS; i++) {
+    xcc_id = GET_INST(GC, i);
+    if (xcc_id >= 0)
+    gpu_metrics->current_gfxclk[i] =
+    SMUQ10_ROUND(metrics->GfxclkFrequency[xcc_id]);
+
+    if (i < MAX_CLKS) {
+    gpu_metrics->current_socclk[i] =
+    SMUQ10_ROUND(metrics->SocclkFrequency[i]);
+    inst = GET_INST(VCN, i);
+    if (inst >= 0) {
+    gpu_metrics->current_vclk0[i] =
+    SMUQ10_ROUND(metrics->VclkFrequency[inst]);
+    gpu_metrics->current_dclk0[i] =
+    SMUQ10_ROUND(metrics->DclkFrequency[inst]);
+    }
+    }
+    }
-    gpu_metrics->average_gfxclk_frequency = gpu_metrics->current_gfxclk;
-    gpu_metrics->average_socclk_frequency = gpu_metrics->current_socclk;
-    gpu_metrics->average_uclk_frequency = gpu_metrics->current_uclk;
-    gpu_metrics->average_vclk0_frequency = gpu_metrics->current_vclk0;
-    gpu_metrics->average_dclk0_frequency = gpu_metrics->current_dclk0;
+    gpu_metrics->current_uclk = SMUQ10_ROUND(metrics->UclkFrequency);
  /* Throttle status is not reported through metrics now */
  gpu_metrics->throttle_status = 0;
+    /* Clock Lock Status. Each bit corresponds to each GFXCLK 
instance */
+    gpu_metrics->gfxclk_lock_status = metrics->GfxLockXCDMak >> 
GET_INST(GC, 0);

+
  if (!(adev->flags & AMD_IS_APU)) {
  link_width_level = 
smu_v13_0_6_get_current_pcie_link_width_level(smu);

  if (link_width_level > MAX_LINK_WIDTH)
@@ -2033,6 +2038,10 @@ static ssize_t 
smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table

  

Re: [PATCH v2 3/3] drm/amd/pm: Use gpu_metrics_v1_4 for SMUv13.0.6

2023-10-09 Thread Lazar, Lijo




On 10/6/2023 8:11 PM, Asad Kamal wrote:

Use gpu_metrics_v1_4 for SMUv13.0.6 to fill
gpu metric info

Signed-off-by: Asad Kamal 


Series is:
Reviewed-by: Lijo Lazar 

Thanks,
Lijo


---
  .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  | 67 ---
  1 file changed, 43 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index ce971a93d28b..3a07f1c95e45 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -279,7 +279,7 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu)
return -ENOMEM;
smu_table->metrics_time = 0;
  
-	smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_3);

+   smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_4);
smu_table->gpu_metrics_table =
kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
if (!smu_table->gpu_metrics_table) {
@@ -1969,22 +1969,19 @@ static int 
smu_v13_0_6_get_current_pcie_link_speed(struct smu_context *smu)
  static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void 
**table)
  {
struct smu_table_context *smu_table = >smu_table;
-   struct gpu_metrics_v1_3 *gpu_metrics =
-   (struct gpu_metrics_v1_3 *)smu_table->gpu_metrics_table;
+   struct gpu_metrics_v1_4 *gpu_metrics =
+   (struct gpu_metrics_v1_4 *)smu_table->gpu_metrics_table;
struct amdgpu_device *adev = smu->adev;
-   int ret = 0, inst0, xcc0;
+   int ret = 0, xcc_id, inst, i;
MetricsTable_t *metrics;
u16 link_width_level;
  
-	inst0 = adev->sdma.instance[0].aid_id;

-   xcc0 = GET_INST(GC, 0);
-
metrics = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL);
ret = smu_v13_0_6_get_metrics_table(smu, metrics, true);
if (ret)
return ret;
  
-	smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 3);

+   smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 4);
  
  	gpu_metrics->temperature_hotspot =

SMUQ10_ROUND(metrics->MaxSocketTemperature);
@@ -2000,30 +1997,38 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct 
smu_context *smu, void **table
gpu_metrics->average_umc_activity =
SMUQ10_ROUND(metrics->DramBandwidthUtilization);
  
-	gpu_metrics->average_socket_power =

+   gpu_metrics->curr_socket_power =
SMUQ10_ROUND(metrics->SocketPower);
/* Energy counter reported in 15.259uJ (2^-16) units */
gpu_metrics->energy_accumulator = metrics->SocketEnergyAcc;
  
-	gpu_metrics->current_gfxclk =

-   SMUQ10_ROUND(metrics->GfxclkFrequency[xcc0]);
-   gpu_metrics->current_socclk =
-   SMUQ10_ROUND(metrics->SocclkFrequency[inst0]);
-   gpu_metrics->current_uclk = SMUQ10_ROUND(metrics->UclkFrequency);
-   gpu_metrics->current_vclk0 =
-   SMUQ10_ROUND(metrics->VclkFrequency[inst0]);
-   gpu_metrics->current_dclk0 =
-   SMUQ10_ROUND(metrics->DclkFrequency[inst0]);
+   for (i = 0; i < MAX_GFX_CLKS; i++) {
+   xcc_id = GET_INST(GC, i);
+   if (xcc_id >= 0)
+   gpu_metrics->current_gfxclk[i] =
+   SMUQ10_ROUND(metrics->GfxclkFrequency[xcc_id]);
+
+   if (i < MAX_CLKS) {
+   gpu_metrics->current_socclk[i] =
+   SMUQ10_ROUND(metrics->SocclkFrequency[i]);
+   inst = GET_INST(VCN, i);
+   if (inst >= 0) {
+   gpu_metrics->current_vclk0[i] =
+   
SMUQ10_ROUND(metrics->VclkFrequency[inst]);
+   gpu_metrics->current_dclk0[i] =
+   
SMUQ10_ROUND(metrics->DclkFrequency[inst]);
+   }
+   }
+   }
  
-	gpu_metrics->average_gfxclk_frequency = gpu_metrics->current_gfxclk;

-   gpu_metrics->average_socclk_frequency = gpu_metrics->current_socclk;
-   gpu_metrics->average_uclk_frequency = gpu_metrics->current_uclk;
-   gpu_metrics->average_vclk0_frequency = gpu_metrics->current_vclk0;
-   gpu_metrics->average_dclk0_frequency = gpu_metrics->current_dclk0;
+   gpu_metrics->current_uclk = SMUQ10_ROUND(metrics->UclkFrequency);
  
  	/* Throttle status is not reported through metrics now */

gpu_metrics->throttle_status = 0;
  
+	/* Clock Lock Status. Each bit corresponds to each GFXCLK instance */

+   gpu_metrics->gfxclk_lock_status = metrics->GfxLockXCDMak >> 
GET_INST(GC, 0);
+
if (!(adev->flags & AMD_IS_APU)) {
link_width_level = 
smu_v13_0_6_get_current_pcie_link_width_level(smu);
if (link_width_level > MAX_LINK_WIDTH)
@@ -2033,6 +2038,10 @@ static ssize_t