Re: [PATCH v3 2/2] drm/amd/pm: vangogh: send the SMT enable message to pmfw

2023-03-28 Thread Lazar, Lijo




On 3/29/2023 7:21 AM, Wenyou Yang wrote:

When the CPU SMT status is changed in the fly, sent the SMT enable
message to pmfw to notify it that the SMT status changed.

Add the support to send PPSMC_MSG_SetCClkSMTEnable(0x58) message
to pmfw for vangogh.

Signed-off-by: Wenyou Yang 
---
  drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c |  5 +++
  drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  7 +++
  .../pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h|  3 +-
  drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h  |  3 +-
  .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c  | 43 +++
  5 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index b5d64749990e..d53d2acc9b46 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -69,6 +69,8 @@ static int smu_set_fan_speed_rpm(void *handle, uint32_t 
speed);
  static int smu_set_gfx_cgpg(struct smu_context *smu, bool enabled);
  static int smu_set_mp1_state(void *handle, enum pp_mp1_state mp1_state);
  
+extern struct raw_notifier_head smt_notifier_head;

+
  static int smu_sys_get_pp_feature_mask(void *handle,
   char *buf)
  {
@@ -1122,6 +1124,9 @@ static int smu_sw_fini(void *handle)
  
  	smu_fini_microcode(smu);
  
+	if (smu->nb.notifier_call != NULL)

+   raw_notifier_chain_unregister(&smt_notifier_head, &smu->nb);
+
return 0;
  }
  
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h

index 09469c750a96..4d51ac5ec8ba 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -566,6 +566,8 @@ struct smu_context
  
  	struct firmware pptable_firmware;
  
+	struct notifier_block nb;

+
u32 param_reg;
u32 msg_reg;
u32 resp_reg;
@@ -1354,6 +1356,11 @@ struct pptable_funcs {
 * @init_pptable_microcode: Prepare the pptable microcode to upload via 
PSP
 */
int (*init_pptable_microcode)(struct smu_context *smu);
+
+   /**
+* @set_cpu_smt_enable: Set the CPU SMT status.
+*/
+   int (*set_cpu_smt_enable)(struct smu_context *smu, bool smt_enable);
  };
  
  typedef enum {

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h
index 7471e2df2828..a6bfa1912c42 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h
@@ -111,7 +111,8 @@
  #define PPSMC_MSG_GetGfxOffStatus0x50
  #define PPSMC_MSG_GetGfxOffEntryCount0x51
  #define PPSMC_MSG_LogGfxOffResidency 0x52
-#define PPSMC_Message_Count0x53
+#define PPSMC_MSG_SetCClkSMTEnable0x58
+#define PPSMC_Message_Count0x59
  
  //Argument for PPSMC_MSG_GfxDeviceDriverReset

  enum {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
index 297b70b9388f..820812d910bf 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
@@ -245,7 +245,8 @@
__SMU_DUMMY_MAP(AllowGpo),  \
__SMU_DUMMY_MAP(Mode2Reset),\
__SMU_DUMMY_MAP(RequestI2cTransaction), \
-   __SMU_DUMMY_MAP(GetMetricsTable),
+   __SMU_DUMMY_MAP(GetMetricsTable), \
+   __SMU_DUMMY_MAP(SetCClkSMTEnable),
  
  #undef __SMU_DUMMY_MAP

  #define __SMU_DUMMY_MAP(type) SMU_MSG_##type
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
index 7433dcaa16e0..07f8822f2eb0 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
@@ -35,6 +35,7 @@
  #include "asic_reg/gc/gc_10_3_0_offset.h"
  #include "asic_reg/gc/gc_10_3_0_sh_mask.h"
  #include 
+#include 
  
  /*

   * DO NOT use these for err/warn/info/debug messages.
@@ -70,6 +71,8 @@
FEATURE_MASK(FEATURE_DCFCLK_DPM_BIT)| \
FEATURE_MASK(FEATURE_GFX_DPM_BIT))
  
+extern struct raw_notifier_head smt_notifier_head;

+
  static struct cmn2asic_msg_mapping vangogh_message_map[SMU_MSG_MAX_COUNT] = {
MSG_MAP(TestMessage,PPSMC_MSG_TestMessage,  
0),
MSG_MAP(GetSmuVersion,  PPSMC_MSG_GetSmuVersion,
0),
@@ -141,6 +144,7 @@ static struct cmn2asic_msg_mapping 
vangogh_message_map[SMU_MSG_MAX_COUNT] = {
MSG_MAP(GetGfxOffStatus,PPSMC_MSG_GetGfxOffStatus,  
0),
MSG_MAP(GetGfxOffEntryCount,
PPSMC_MSG_GetGfxOffEntryCount,  0),
MSG_MAP(LogGfxOffResidency, 
PPSMC_MSG_LogGfxOff

RE: [PATCH] drm/amdgpu: enable sysfs node pp_dpm_vclk1 for some asics

2023-03-28 Thread Liu01, Tong (Esther)
[AMD Official Use Only - General]

Hi @Quan, Evan,

I checked pp_dpm_vclk and the new added pp_dpm_vclk1, the values are not 
exactly same, please help check it:

root@ubuntu2204-RPP:~# cat /sys/class/drm/renderD128/device/pp_dpm_vclk1
0: 513Mhz
1: 188Mhz *
2: 2934Mhz
root@ubuntu2204-RPP:~# cat /sys/class/drm/renderD128/device/pp_dpm_vclk
0: 513Mhz
1: 25Mhz *
2: 2934Mhz

Kind regards,
Esther

-Original Message-
From: Quan, Evan  
Sent: 2023年3月29日星期三 上午10:12
To: Liu01, Tong (Esther) ; amd-gfx@lists.freedesktop.org
Cc: Chen, Horace ; Tuikov, Luben ; 
Koenig, Christian ; Deucher, Alexander 
; Xiao, Jack ; Zhang, Hawking 
; Liu, Monk ; Xu, Feifei 
; Wang, Yang(Kevin) ; Liu01, Tong 
(Esther) 
Subject: RE: [PATCH] drm/amdgpu: enable sysfs node pp_dpm_vclk1 for some asics

[AMD Official Use Only - General]

IIRC, the VCLK1 always have the same frequency as VCLK0 with our current 
implementation.
So, is it necessary to provide another sysfs node for checking vclk1 frequency?

BR
Evan
> -Original Message-
> From: Tong Liu01 
> Sent: Tuesday, March 28, 2023 7:42 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Quan, Evan ; Chen, Horace 
> ; Tuikov, Luben ; Koenig, 
> Christian ; Deucher, Alexander 
> ; Xiao, Jack ; Zhang, 
> Hawking ; Liu, Monk ; Xu, 
> Feifei ; Wang, Yang(Kevin) 
> ; Liu01, Tong (Esther) 
> Subject: [PATCH] drm/amdgpu: enable sysfs node pp_dpm_vclk1 for some 
> asics
> 
> Add sysfs node pp_dpm_vclk1 for gc11.0.3
> 
> Signed-off-by: Tong Liu01 
> ---
>  .../gpu/drm/amd/include/kgd_pp_interface.h|  1 +
>  drivers/gpu/drm/amd/pm/amdgpu_pm.c| 22
> +++
>  drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c |  4 
>  3 files changed, 27 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> index 86b6b0c9fb02..fe75497eeeab 100644
> --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> @@ -104,6 +104,7 @@ enum pp_clock_type {
>   PP_FCLK,
>   PP_DCEFCLK,
>   PP_VCLK,
> + PP_VCLK1,
>   PP_DCLK,
>   OD_SCLK,
>   OD_MCLK,
> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> index d75a67cfe523..1da6e9469450 100644
> --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> @@ -1180,6 +1180,21 @@ static ssize_t amdgpu_set_pp_dpm_vclk(struct 
> device *dev,
>   return amdgpu_set_pp_dpm_clock(dev, PP_VCLK, buf, count);  }
> 
> +static ssize_t amdgpu_get_pp_dpm_vclk1(struct device *dev,
> + struct device_attribute *attr,
> + char *buf)
> +{
> + return amdgpu_get_pp_dpm_clock(dev, PP_VCLK1, buf); }
> +
> +static ssize_t amdgpu_set_pp_dpm_vclk1(struct device *dev,
> + struct device_attribute *attr,
> + const char *buf,
> + size_t count)
> +{
> + return amdgpu_set_pp_dpm_clock(dev, PP_VCLK1, buf, count); }
> +
>  static ssize_t amdgpu_get_pp_dpm_dclk(struct device *dev,
>   struct device_attribute *attr,
>   char *buf)
> @@ -2002,6 +2017,7 @@ static struct amdgpu_device_attr 
> amdgpu_device_attrs[] = {
>   AMDGPU_DEVICE_ATTR_RW(pp_dpm_socclk,
>   ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
>   AMDGPU_DEVICE_ATTR_RW(pp_dpm_fclk,
>   ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
>   AMDGPU_DEVICE_ATTR_RW(pp_dpm_vclk,
>   ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
> + AMDGPU_DEVICE_ATTR_RW(pp_dpm_vclk1,
>   ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
>   AMDGPU_DEVICE_ATTR_RW(pp_dpm_dclk,
>   ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
>   AMDGPU_DEVICE_ATTR_RW(pp_dpm_dcefclk,
>   ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
>   AMDGPU_DEVICE_ATTR_RW(pp_dpm_pcie,
>   ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
> @@ -2091,6 +2107,12 @@ static int default_attr_update(struct 
> amdgpu_device *adev, struct amdgpu_device_
> gc_ver == IP_VERSION(11, 0, 2) ||
> gc_ver == IP_VERSION(11, 0, 3)))
>   *states = ATTR_STATE_UNSUPPORTED;
> + } else if (DEVICE_ATTR_IS(pp_dpm_vclk1)) {
> + if (!((gc_ver == IP_VERSION(10, 3, 1) ||
> +gc_ver == IP_VERSION(10, 3, 0) ||
> +gc_ver == IP_VERSION(11, 0, 2) ||
> +gc_ver == IP_VERSION(11, 0, 3)) && adev-
> >vcn.num_vcn_inst >= 2))
> + *states = ATTR_STATE_UNSUPPORTED;
>   } else if (DEVICE_ATTR_IS(pp_dpm_dclk)) {
>   if (!(gc_ver == IP_VERSION(10, 3, 1) ||
> gc_ver == IP_VERSION(10, 3, 0) || diff --git 
> a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> index b5d64749990e..bffbef3f666d 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> @@ -2006,6 +2006,8 @@ static int smu_force_ppclk_levels(void *handle,
>   clk_type = SMU_DCEFCLK; brea

RE: [PATCH] drm/amdgpu: fix AMDGPU_RAS_BLOCK__DF check

2023-03-28 Thread Zhang, Hawking
[AMD Official Use Only - General]

Reviewed-by: Hawking Zhang 

Regards,
Hawking
-Original Message-
From: Dan Carpenter  
Sent: Wednesday, March 29, 2023 13:28
To: Zhang, Hawking 
Cc: Koenig, Christian ; Pan, Xinhui 
; David Airlie ; Daniel Vetter 
; Zhang, Hawking ; Zhou1, Tao 
; Yang, Stanley ; Chai, Thomas 
; Zhao, Victor ; Li, Candice 
; amd-gfx@lists.freedesktop.org; 
kernel-janit...@vger.kernel.org
Subject: [PATCH] drm/amdgpu: fix AMDGPU_RAS_BLOCK__DF check

There is a mixup where AMDGPU_RAS_BLOCK__DF is used as a mask instead of a 
shifter.  It means that this condition will be true for AMDGPU_RAS_BLOCK__MMHUB 
instead of for AMDGPU_RAS_BLOCK__DF.

Fixes: b6f512168478 ("drm/amdgpu: Add fatal error handling in nbio v4_3")
Signed-off-by: Dan Carpenter 
---
>From static analysis.  Not tested at all.

 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index fac45f98145d..4069bce9479f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2564,7 +2564,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
adev->nbio.ras = &nbio_v7_4_ras;
break;
case IP_VERSION(4, 3, 0):
-   if (adev->ras_hw_enabled & AMDGPU_RAS_BLOCK__DF)
+   if (adev->ras_hw_enabled & (1 << AMDGPU_RAS_BLOCK__DF))
/* unlike other generation of nbio ras,
 * nbio v4_3 only support fatal error interrupt
 * to inform software that DF is freezed due to
--
2.39.1


Re: [PATCH v3 2/2] drm/amd/pm: vangogh: send the SMT enable message to pmfw

2023-03-28 Thread Mario Limonciello



On 3/28/23 20:51, Wenyou Yang wrote:

When the CPU SMT status is changed in the fly, sent the SMT enable
message to pmfw to notify it that the SMT status changed.

Add the support to send PPSMC_MSG_SetCClkSMTEnable(0x58) message
to pmfw for vangogh.

Signed-off-by: Wenyou Yang 
---
  drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c |  5 +++
  drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  7 +++
  .../pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h|  3 +-
  drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h  |  3 +-
  .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c  | 43 +++
  5 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index b5d64749990e..d53d2acc9b46 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -69,6 +69,8 @@ static int smu_set_fan_speed_rpm(void *handle, uint32_t 
speed);
  static int smu_set_gfx_cgpg(struct smu_context *smu, bool enabled);
  static int smu_set_mp1_state(void *handle, enum pp_mp1_state mp1_state);
  
+extern struct raw_notifier_head smt_notifier_head;

+
  static int smu_sys_get_pp_feature_mask(void *handle,
   char *buf)
  {
@@ -1122,6 +1124,9 @@ static int smu_sw_fini(void *handle)
  
  	smu_fini_microcode(smu);
  
+	if (smu->nb.notifier_call != NULL)

+   raw_notifier_chain_unregister(&smt_notifier_head, &smu->nb);
+
return 0;
  }
  
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h

index 09469c750a96..4d51ac5ec8ba 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -566,6 +566,8 @@ struct smu_context
  
  	struct firmware pptable_firmware;
  
+	struct notifier_block nb;

+
u32 param_reg;
u32 msg_reg;
u32 resp_reg;
@@ -1354,6 +1356,11 @@ struct pptable_funcs {
 * @init_pptable_microcode: Prepare the pptable microcode to upload via 
PSP
 */
int (*init_pptable_microcode)(struct smu_context *smu);
+
+   /**
+* @set_cpu_smt_enable: Set the CPU SMT status.
+*/
+   int (*set_cpu_smt_enable)(struct smu_context *smu, bool smt_enable);
  };
  
  typedef enum {

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h
index 7471e2df2828..a6bfa1912c42 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h
@@ -111,7 +111,8 @@
  #define PPSMC_MSG_GetGfxOffStatus0x50
  #define PPSMC_MSG_GetGfxOffEntryCount0x51
  #define PPSMC_MSG_LogGfxOffResidency 0x52
-#define PPSMC_Message_Count0x53
+#define PPSMC_MSG_SetCClkSMTEnable0x58
+#define PPSMC_Message_Count0x59
  
  //Argument for PPSMC_MSG_GfxDeviceDriverReset

  enum {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
index 297b70b9388f..820812d910bf 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
@@ -245,7 +245,8 @@
__SMU_DUMMY_MAP(AllowGpo),  \
__SMU_DUMMY_MAP(Mode2Reset),\
__SMU_DUMMY_MAP(RequestI2cTransaction), \
-   __SMU_DUMMY_MAP(GetMetricsTable),
+   __SMU_DUMMY_MAP(GetMetricsTable), \
+   __SMU_DUMMY_MAP(SetCClkSMTEnable),
  
  #undef __SMU_DUMMY_MAP

  #define __SMU_DUMMY_MAP(type) SMU_MSG_##type
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
index 7433dcaa16e0..07f8822f2eb0 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
@@ -35,6 +35,7 @@
  #include "asic_reg/gc/gc_10_3_0_offset.h"
  #include "asic_reg/gc/gc_10_3_0_sh_mask.h"
  #include 
+#include 
  
  /*

   * DO NOT use these for err/warn/info/debug messages.
@@ -70,6 +71,8 @@
FEATURE_MASK(FEATURE_DCFCLK_DPM_BIT)| \
FEATURE_MASK(FEATURE_GFX_DPM_BIT))
  
+extern struct raw_notifier_head smt_notifier_head;

+
  static struct cmn2asic_msg_mapping vangogh_message_map[SMU_MSG_MAX_COUNT] = {
MSG_MAP(TestMessage,PPSMC_MSG_TestMessage,  
0),
MSG_MAP(GetSmuVersion,  PPSMC_MSG_GetSmuVersion,
0),
@@ -141,6 +144,7 @@ static struct cmn2asic_msg_mapping 
vangogh_message_map[SMU_MSG_MAX_COUNT] = {
MSG_MAP(GetGfxOffStatus,PPSMC_MSG_GetGfxOffStatus,  
0),
MSG_MAP(GetGfxOffEntryCount,
PPSMC_MSG_GetGfxOffEntryCount,  0),
MSG_MAP(LogGfxOffResidency, 
PPSMC_MSG_LogGfxOffResid

RE: [PATCH] drm/amdgpu: enable sysfs node pp_dpm_vclk1 for some asics

2023-03-28 Thread Quan, Evan
[AMD Official Use Only - General]

IIRC, the VCLK1 always have the same frequency as VCLK0 with our current 
implementation.
So, is it necessary to provide another sysfs node for checking vclk1 frequency?

BR
Evan
> -Original Message-
> From: Tong Liu01 
> Sent: Tuesday, March 28, 2023 7:42 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Quan, Evan ; Chen, Horace
> ; Tuikov, Luben ;
> Koenig, Christian ; Deucher, Alexander
> ; Xiao, Jack ; Zhang,
> Hawking ; Liu, Monk ; Xu,
> Feifei ; Wang, Yang(Kevin)
> ; Liu01, Tong (Esther) 
> Subject: [PATCH] drm/amdgpu: enable sysfs node pp_dpm_vclk1 for some
> asics
> 
> Add sysfs node pp_dpm_vclk1 for gc11.0.3
> 
> Signed-off-by: Tong Liu01 
> ---
>  .../gpu/drm/amd/include/kgd_pp_interface.h|  1 +
>  drivers/gpu/drm/amd/pm/amdgpu_pm.c| 22
> +++
>  drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c |  4 
>  3 files changed, 27 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> index 86b6b0c9fb02..fe75497eeeab 100644
> --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> @@ -104,6 +104,7 @@ enum pp_clock_type {
>   PP_FCLK,
>   PP_DCEFCLK,
>   PP_VCLK,
> + PP_VCLK1,
>   PP_DCLK,
>   OD_SCLK,
>   OD_MCLK,
> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> index d75a67cfe523..1da6e9469450 100644
> --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> @@ -1180,6 +1180,21 @@ static ssize_t amdgpu_set_pp_dpm_vclk(struct
> device *dev,
>   return amdgpu_set_pp_dpm_clock(dev, PP_VCLK, buf, count);
>  }
> 
> +static ssize_t amdgpu_get_pp_dpm_vclk1(struct device *dev,
> + struct device_attribute *attr,
> + char *buf)
> +{
> + return amdgpu_get_pp_dpm_clock(dev, PP_VCLK1, buf);
> +}
> +
> +static ssize_t amdgpu_set_pp_dpm_vclk1(struct device *dev,
> + struct device_attribute *attr,
> + const char *buf,
> + size_t count)
> +{
> + return amdgpu_set_pp_dpm_clock(dev, PP_VCLK1, buf, count);
> +}
> +
>  static ssize_t amdgpu_get_pp_dpm_dclk(struct device *dev,
>   struct device_attribute *attr,
>   char *buf)
> @@ -2002,6 +2017,7 @@ static struct amdgpu_device_attr
> amdgpu_device_attrs[] = {
>   AMDGPU_DEVICE_ATTR_RW(pp_dpm_socclk,
>   ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
>   AMDGPU_DEVICE_ATTR_RW(pp_dpm_fclk,
>   ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
>   AMDGPU_DEVICE_ATTR_RW(pp_dpm_vclk,
>   ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
> + AMDGPU_DEVICE_ATTR_RW(pp_dpm_vclk1,
>   ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
>   AMDGPU_DEVICE_ATTR_RW(pp_dpm_dclk,
>   ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
>   AMDGPU_DEVICE_ATTR_RW(pp_dpm_dcefclk,
>   ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
>   AMDGPU_DEVICE_ATTR_RW(pp_dpm_pcie,
>   ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
> @@ -2091,6 +2107,12 @@ static int default_attr_update(struct
> amdgpu_device *adev, struct amdgpu_device_
> gc_ver == IP_VERSION(11, 0, 2) ||
> gc_ver == IP_VERSION(11, 0, 3)))
>   *states = ATTR_STATE_UNSUPPORTED;
> + } else if (DEVICE_ATTR_IS(pp_dpm_vclk1)) {
> + if (!((gc_ver == IP_VERSION(10, 3, 1) ||
> +gc_ver == IP_VERSION(10, 3, 0) ||
> +gc_ver == IP_VERSION(11, 0, 2) ||
> +gc_ver == IP_VERSION(11, 0, 3)) && adev-
> >vcn.num_vcn_inst >= 2))
> + *states = ATTR_STATE_UNSUPPORTED;
>   } else if (DEVICE_ATTR_IS(pp_dpm_dclk)) {
>   if (!(gc_ver == IP_VERSION(10, 3, 1) ||
> gc_ver == IP_VERSION(10, 3, 0) ||
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> index b5d64749990e..bffbef3f666d 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> @@ -2006,6 +2006,8 @@ static int smu_force_ppclk_levels(void *handle,
>   clk_type = SMU_DCEFCLK; break;
>   case PP_VCLK:
>   clk_type = SMU_VCLK; break;
> + case PP_VCLK1:
> + clk_type = SMU_VCLK1; break;
>   case PP_DCLK:
>   clk_type = SMU_DCLK; break;
>   case OD_SCLK:
> @@ -2393,6 +2395,8 @@ static enum smu_clk_type
> smu_convert_to_smuclk(enum pp_clock_type type)
>   clk_type = SMU_DCEFCLK; break;
>   case PP_VCLK:
>   clk_type = SMU_VCLK; break;
> + case PP_VCLK1:
> + clk_type = SMU_VCLK1; break;
>   case PP_DCLK:
>   clk_type = SMU_DCLK; break;
>   case OD_SCLK:
> --
> 2.34.1


[PATCH v3 2/2] drm/amd/pm: vangogh: send the SMT enable message to pmfw

2023-03-28 Thread Wenyou Yang
When the CPU SMT status is changed in the fly, sent the SMT enable
message to pmfw to notify it that the SMT status changed.

Add the support to send PPSMC_MSG_SetCClkSMTEnable(0x58) message
to pmfw for vangogh.

Signed-off-by: Wenyou Yang 
---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c |  5 +++
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  7 +++
 .../pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h|  3 +-
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h  |  3 +-
 .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c  | 43 +++
 5 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index b5d64749990e..d53d2acc9b46 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -69,6 +69,8 @@ static int smu_set_fan_speed_rpm(void *handle, uint32_t 
speed);
 static int smu_set_gfx_cgpg(struct smu_context *smu, bool enabled);
 static int smu_set_mp1_state(void *handle, enum pp_mp1_state mp1_state);
 
+extern struct raw_notifier_head smt_notifier_head;
+
 static int smu_sys_get_pp_feature_mask(void *handle,
   char *buf)
 {
@@ -1122,6 +1124,9 @@ static int smu_sw_fini(void *handle)
 
smu_fini_microcode(smu);
 
+   if (smu->nb.notifier_call != NULL)
+   raw_notifier_chain_unregister(&smt_notifier_head, &smu->nb);
+
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 09469c750a96..4d51ac5ec8ba 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -566,6 +566,8 @@ struct smu_context
 
struct firmware pptable_firmware;
 
+   struct notifier_block nb;
+
u32 param_reg;
u32 msg_reg;
u32 resp_reg;
@@ -1354,6 +1356,11 @@ struct pptable_funcs {
 * @init_pptable_microcode: Prepare the pptable microcode to upload via 
PSP
 */
int (*init_pptable_microcode)(struct smu_context *smu);
+
+   /**
+* @set_cpu_smt_enable: Set the CPU SMT status.
+*/
+   int (*set_cpu_smt_enable)(struct smu_context *smu, bool smt_enable);
 };
 
 typedef enum {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h
index 7471e2df2828..a6bfa1912c42 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h
@@ -111,7 +111,8 @@
 #define PPSMC_MSG_GetGfxOffStatus 0x50
 #define PPSMC_MSG_GetGfxOffEntryCount 0x51
 #define PPSMC_MSG_LogGfxOffResidency  0x52
-#define PPSMC_Message_Count0x53
+#define PPSMC_MSG_SetCClkSMTEnable0x58
+#define PPSMC_Message_Count0x59
 
 //Argument for PPSMC_MSG_GfxDeviceDriverReset
 enum {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
index 297b70b9388f..820812d910bf 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
@@ -245,7 +245,8 @@
__SMU_DUMMY_MAP(AllowGpo),  \
__SMU_DUMMY_MAP(Mode2Reset),\
__SMU_DUMMY_MAP(RequestI2cTransaction), \
-   __SMU_DUMMY_MAP(GetMetricsTable),
+   __SMU_DUMMY_MAP(GetMetricsTable), \
+   __SMU_DUMMY_MAP(SetCClkSMTEnable),
 
 #undef __SMU_DUMMY_MAP
 #define __SMU_DUMMY_MAP(type)  SMU_MSG_##type
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
index 7433dcaa16e0..07f8822f2eb0 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
@@ -35,6 +35,7 @@
 #include "asic_reg/gc/gc_10_3_0_offset.h"
 #include "asic_reg/gc/gc_10_3_0_sh_mask.h"
 #include 
+#include 
 
 /*
  * DO NOT use these for err/warn/info/debug messages.
@@ -70,6 +71,8 @@
FEATURE_MASK(FEATURE_DCFCLK_DPM_BIT)| \
FEATURE_MASK(FEATURE_GFX_DPM_BIT))
 
+extern struct raw_notifier_head smt_notifier_head;
+
 static struct cmn2asic_msg_mapping vangogh_message_map[SMU_MSG_MAX_COUNT] = {
MSG_MAP(TestMessage,PPSMC_MSG_TestMessage,  
0),
MSG_MAP(GetSmuVersion,  PPSMC_MSG_GetSmuVersion,
0),
@@ -141,6 +144,7 @@ static struct cmn2asic_msg_mapping 
vangogh_message_map[SMU_MSG_MAX_COUNT] = {
MSG_MAP(GetGfxOffStatus,PPSMC_MSG_GetGfxOffStatus,  
0),
MSG_MAP(GetGfxOffEntryCount,
PPSMC_MSG_GetGfxOffEntryCount,  0),
MSG_MAP(LogGfxOffResidency, 
PPSMC_MSG_LogGfxOffResidency,   0),
+   MSG_

[PATCH v3 1/2] cpu/smt: add a notifier to notify the SMT changes

2023-03-28 Thread Wenyou Yang
Add the notifier chain to notify the cpu SMT status changes

Signed-off-by: Wenyou Yang 
---
 include/linux/cpu.h |  5 +
 kernel/cpu.c| 10 +-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 314802f98b9d..9a842317fe2d 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -213,6 +213,11 @@ enum cpuhp_smt_control {
CPU_SMT_NOT_IMPLEMENTED,
 };
 
+enum cpuhp_smt_status {
+   SMT_ENABLED,
+   SMT_DISABLED,
+};
+
 #if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT)
 extern enum cpuhp_smt_control cpu_smt_control;
 extern void cpu_smt_disable(bool force);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 6c0a92ca6bb5..1af66a3ffd99 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -89,6 +89,9 @@ static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
 cpumask_t cpus_booted_once_mask;
 #endif
 
+RAW_NOTIFIER_HEAD(smt_notifier_head);
+EXPORT_SYMBOL(smt_notifier_head);
+
 #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
 static struct lockdep_map cpuhp_state_up_map =
STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
@@ -2281,8 +2284,10 @@ int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
 */
cpuhp_offline_cpu_device(cpu);
}
-   if (!ret)
+   if (!ret) {
cpu_smt_control = ctrlval;
+   raw_notifier_call_chain(&smt_notifier_head, SMT_DISABLED, NULL);
+   }
cpu_maps_update_done();
return ret;
 }
@@ -2303,6 +2308,9 @@ int cpuhp_smt_enable(void)
/* See comment in cpuhp_smt_disable() */
cpuhp_online_cpu_device(cpu);
}
+   if (!ret)
+   raw_notifier_call_chain(&smt_notifier_head, SMT_ENABLED, NULL);
+
cpu_maps_update_done();
return ret;
 }
-- 
2.39.2



[PATCH v3 0/2] send message to pmfw when SMT changes

2023-03-28 Thread Wenyou Yang
When the CPU SMT changes on the fly, send the message to pmfw
to notify the SMT status changed.

Changes in v3
1./ Because it is only required for Vangogh, move registering notifier
to vangogh_ppt.c, then remove the patch 2, and the number of patches
decreased to 2.

Changes in v2:
1/. Embed the smt notifer callback into "struct smu_context" structure.
2/. Correct the PPSMC_Message_Count value.
3/. Improve several code styles and others.

Wenyou Yang (2):
  cpu/smt: add a notifier to notify the SMT changes
  drm/amd/pm: vangogh: send the SMT enable message to pmfw

 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c |  5 +++
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  7 +++
 .../pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h|  3 +-
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h  |  3 +-
 .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c  | 43 +++
 include/linux/cpu.h   |  5 +++
 kernel/cpu.c  | 10 -
 7 files changed, 73 insertions(+), 3 deletions(-)

-- 
2.39.2



[PATCH 2/2] drm/amd/display: Add previous prototype to 'optc3_wait_drr_doublebuffer_pending_clear'

2023-03-28 Thread Caio Novais
Compiling AMD GPU drivers displays a warning:

drivers/gpu/drm/amd/amdgpu/../display/dc/dcn30/dcn30_optc.c:294:6: warning: no 
previous prototype for ‘optc3_wait_drr_doublebuffer_pending_clear’ 
[-Wmissing-prototypes]

Get rid of it by adding a function prototype

'optc3_wait_drr_doublebuffer_pending_clear(struct timing_generator *optc)' on 
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h

Signed-off-by: Caio Novais 
---
 drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h 
b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h
index fb06dc9a4893..2e3ba6e2f336 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h
@@ -331,6 +331,8 @@ void optc3_lock_doublebuffer_enable(struct timing_generator 
*optc);
 
 void optc3_lock_doublebuffer_disable(struct timing_generator *optc);
 
+void optc3_wait_drr_doublebuffer_pending_clear(struct timing_generator *optc);
+
 void optc3_set_drr_trigger_window(struct timing_generator *optc,
uint32_t window_start, uint32_t window_end);
 
-- 
2.40.0



[PATCH 1/2] drm/amd/display: Remove unused variable 'scl_enable'

2023-03-28 Thread Caio Novais
Compiling AMD GPU drivers displays a warning:

drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn314/display_rq_dlg_calc_314.c: 
In function ‘dml_rq_dlg_get_dlg_params’:
drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn314/display_rq_dlg_calc_314.c:991:14:
 warning: variable ‘scl_enable’ set but not used [-Wunused-but-set-variable]

Get rid of it by removing the variable 'scl_enable'.

Signed-off-by: Caio Novais 
---
 .../gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c | 2 --
 1 file changed, 2 deletions(-)

diff --git 
a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c
index d1c2693a2e28..ea4eb66066c4 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c
@@ -988,7 +988,6 @@ static void dml_rq_dlg_get_dlg_params(
double hratio_c;
double vratio_l;
double vratio_c;
-   bool scl_enable;
 
unsigned int swath_width_ub_l;
unsigned int dpte_groups_per_row_ub_l;
@@ -1117,7 +1116,6 @@ static void dml_rq_dlg_get_dlg_params(
hratio_c = scl->hscl_ratio_c;
vratio_l = scl->vscl_ratio;
vratio_c = scl->vscl_ratio_c;
-   scl_enable = scl->scl_enable;
 
swath_width_ub_l = rq_dlg_param->rq_l.swath_width_ub;
dpte_groups_per_row_ub_l = rq_dlg_param->rq_l.dpte_groups_per_row_ub;
-- 
2.40.0



[PATCH 0/2] drm/amd/display: Remove a unused variable and add a function prototype

2023-03-28 Thread Caio Novais
This patchset removes one unused variable and adds a function prototype.

Caio Novais (2):
  drm/amd/display: Remove unused variable 'scl_enable'
  drm/amd/display: Add previous prototype to
'optc3_wait_drr_doublebuffer_pending_clear'

 drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h   | 2 ++
 .../gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

-- 
2.40.0



[linux-next:master] BUILD REGRESSION a6faf7ea9fcb7267d06116d4188947f26e00e57e

2023-03-28 Thread kernel test robot
tree/branch: 
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master
branch HEAD: a6faf7ea9fcb7267d06116d4188947f26e00e57e  Add linux-next specific 
files for 20230328

Error/Warning reports:

https://lore.kernel.org/oe-kbuild-all/202303161521.jbgbafjj-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202303281539.zzi4vpw1-...@intel.com

Error/Warning: (recently discovered and may have been fixed)

drivers/gpu/drm/amd/amdgpu/../display/dc/link/link_validation.c:351:13: 
warning: variable 'bw_needed' set but not used [-Wunused-but-set-variable]
drivers/gpu/drm/amd/amdgpu/../display/dc/link/link_validation.c:352:25: 
warning: variable 'link' set but not used [-Wunused-but-set-variable]
drivers/net/wireless/legacy/ray_cs.c:628:17: warning: 'strncpy' specified bound 
32 equals destination size [-Wstringop-truncation]
drivers/perf/arm_pmuv3.c:44:2: error: use of undeclared identifier 
'PERF_MAP_ALL_UNSUPPORTED'
drivers/perf/arm_pmuv3.c:59:2: error: use of undeclared identifier 
'PERF_CACHE_MAP_ALL_UNSUPPORTED'
drivers/perf/arm_pmuv3.c:61:13: error: use of undeclared identifier 'OP_READ'
drivers/perf/arm_pmuv3.c:61:25: error: use of undeclared identifier 
'RESULT_ACCESS'
drivers/perf/arm_pmuv3.c:61:3: error: call to undeclared function 'C'; ISO C99 
and later do not support implicit function declarations 
[-Wimplicit-function-declaration]
drivers/perf/arm_pmuv3.c:61:5: error: use of undeclared identifier 'L1D'
drivers/perf/arm_pmuv3.c:62:25: error: use of undeclared identifier 
'RESULT_MISS'
drivers/perf/arm_pmuv3.c:64:5: error: use of undeclared identifier 'L1I'
drivers/perf/arm_pmuv3.c:67:5: error: use of undeclared identifier 'DTLB'

Unverified Error/Warning (likely false positive, please contact us if 
interested):

arch/parisc/kernel/firmware.c:1271 pdc_soft_power_button_panic() error: 
uninitialized symbol 'flags'.
include/linux/gpio/consumer.h: linux/err.h is included more than once.
include/linux/gpio/driver.h: asm/bug.h is included more than once.
io_uring/io_uring.c:432 io_prep_async_work() error: we previously assumed 
'req->file' could be null (see line 425)
io_uring/kbuf.c:221 __io_remove_buffers() warn: variable dereferenced before 
check 'bl->buf_ring' (see line 219)

Error/Warning ids grouped by kconfigs:

gcc_recent_errors
|-- alpha-allyesconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-link_validation.c:warning:variable-bw_needed-set-but-not-used
|   |-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-link_validation.c:warning:variable-link-set-but-not-used
|   `-- 
drivers-net-wireless-legacy-ray_cs.c:warning:strncpy-specified-bound-equals-destination-size
|-- alpha-randconfig-r012-20230327
|   |-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-link_validation.c:warning:variable-bw_needed-set-but-not-used
|   `-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-link_validation.c:warning:variable-link-set-but-not-used
|-- arc-allyesconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-link_validation.c:warning:variable-bw_needed-set-but-not-used
|   `-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-link_validation.c:warning:variable-link-set-but-not-used
|-- arm-allmodconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-link_validation.c:warning:variable-bw_needed-set-but-not-used
|   `-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-link_validation.c:warning:variable-link-set-but-not-used
|-- arm-allyesconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-link_validation.c:warning:variable-bw_needed-set-but-not-used
|   `-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-link_validation.c:warning:variable-link-set-but-not-used
|-- arm64-allyesconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-link_validation.c:warning:variable-bw_needed-set-but-not-used
|   `-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-link_validation.c:warning:variable-link-set-but-not-used
|-- i386-allyesconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-link_validation.c:warning:variable-bw_needed-set-but-not-used
|   `-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-link_validation.c:warning:variable-link-set-but-not-used
|-- ia64-allmodconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-link_validation.c:warning:variable-bw_needed-set-but-not-used
|   |-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-link_validation.c:warning:variable-link-set-but-not-used
|   `-- 
drivers-net-wireless-legacy-ray_cs.c:warning:strncpy-specified-bound-equals-destination-size
|-- loongarch-allmodconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-link_validation.c:warning:variable-bw_needed-set-but-not-used
|   `-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-link_validation.c:warning:variable-link-set-but-not-used
|-- loongarch-buildonly-randconfig-r001-20230326
|   |-- 
driver

Re: [PATCH] drm/amdgpu: simplify amdgpu_ras_eeprom.c

2023-03-28 Thread Luben Tuikov
On 2023-03-27 20:11, Alex Deucher wrote:
> All chips that support RAS also support IP discovery, so
> use the IP versions rather than a mix of IP versions and
> asic types.
> 
> Signed-off-by: Alex Deucher 
> Cc: Luben Tuikov 
> ---
>  .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c| 72 ++-
>  1 file changed, 20 insertions(+), 52 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
> index 3106fa8a15ef..c2ef2b1456bc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
> @@ -106,48 +106,13 @@
>  #define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, 
> eeprom_control))->adev
>  
>  static bool __is_ras_eeprom_supported(struct amdgpu_device *adev)
> -{
> - if (adev->asic_type == CHIP_IP_DISCOVERY) {
> - switch (adev->ip_versions[MP1_HWIP][0]) {
> - case IP_VERSION(13, 0, 0):
> - case IP_VERSION(13, 0, 10):
> - return true;
> - default:
> - return false;
> - }
> - }
> -
> - return  adev->asic_type == CHIP_VEGA20 ||
> - adev->asic_type == CHIP_ARCTURUS ||
> - adev->asic_type == CHIP_SIENNA_CICHLID ||
> - adev->asic_type == CHIP_ALDEBARAN;
> -}
> -
> -static bool __get_eeprom_i2c_addr_arct(struct amdgpu_device *adev,
> -struct amdgpu_ras_eeprom_control 
> *control)
> -{
> - struct atom_context *atom_ctx = adev->mode_info.atom_context;
> -
> - if (!control || !atom_ctx)
> - return false;
> -
> - if (strnstr(atom_ctx->vbios_version,
> - "D342",
> - sizeof(atom_ctx->vbios_version)))
> - control->i2c_address = EEPROM_I2C_MADDR_0;
> - else
> - control->i2c_address = EEPROM_I2C_MADDR_4;
> -
> - return true;
> -}
> -
> -static bool __get_eeprom_i2c_addr_ip_discovery(struct amdgpu_device *adev,
> -struct amdgpu_ras_eeprom_control 
> *control)
>  {
>   switch (adev->ip_versions[MP1_HWIP][0]) {
> + case IP_VERSION(11, 0, 2): /* VEGA20 and ARCTURUS */
> + case IP_VERSION(11, 0, 7):
>   case IP_VERSION(13, 0, 0):
> + case IP_VERSION(13, 0, 2):
>   case IP_VERSION(13, 0, 10):

I'd add the rest of the proper names here which are being deleted by this 
change,
so as to not lose this information by this commit: Sienna Cichlid and Aldebaran,
the rest can be left blank as per the current state of the code.

> - control->i2c_address = EEPROM_I2C_MADDR_4;
>   return true;
>   default:
>   return false;
> @@ -178,29 +143,32 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device 
> *adev,
>   return true;
>   }
>  
> - switch (adev->asic_type) {
> - case CHIP_VEGA20:
> - control->i2c_address = EEPROM_I2C_MADDR_0;
> + switch (adev->ip_versions[MP1_HWIP][0]) {
> + case IP_VERSION(11, 0, 2):
> + /* VEGA20 and ARCTURUS */
> + if (adev->asic_type == CHIP_VEGA20)
> + control->i2c_address = EEPROM_I2C_MADDR_0;
> + else if (strnstr(atom_ctx->vbios_version,

In the code this is qualified with atom_ctx != NULL; and if it is,
then we return false. So, this is fine, iff we can guarantee that
"atom_ctx" will never be NULL. If, OTOH, we cannot guarantee that,
then we need to add,
else if (!atom_ctx)
return false;
else if (strnstr(...

Although, I do recognize that for Aldebaran below, we do not qualify
atom_ctx, so we should probably qualify there too.

> +  "D342",
> +  sizeof(atom_ctx->vbios_version)))
> + control->i2c_address = EEPROM_I2C_MADDR_0;
> + else
> + control->i2c_address = EEPROM_I2C_MADDR_4;
>   return true;
> -
> - case CHIP_ARCTURUS:
> - return __get_eeprom_i2c_addr_arct(adev, control);
> -
> - case CHIP_SIENNA_CICHLID:
> + case IP_VERSION(11, 0, 7):
>   control->i2c_address = EEPROM_I2C_MADDR_0;
>   return true;
> -
> - case CHIP_ALDEBARAN:
> + case IP_VERSION(13, 0, 2):
>   if (strnstr(atom_ctx->vbios_version, "D673",
>   sizeof(atom_ctx->vbios_version)))
>   control->i2c_address = EEPROM_I2C_MADDR_4;
>   else
>   control->i2c_address = EEPROM_I2C_MADDR_0;
>   return true;
> -
> - case CHIP_IP_DISCOVERY:
> - return __get_eeprom_i2c_addr_ip_discovery(adev, control);
> -
> + case IP_VERSION(13, 0, 0):
> + case IP_VERSION(13, 0, 10):
> + control->i2c_address = EEPROM_I2C_MADDR_4;
> + return true;
>   default:
>   re

RE: [PATCH 12/34] drm/amdgpu: add configurable grace period for unmap queues

2023-03-28 Thread Kim, Jonathan
[Public]

Thanks for catch Kent.
I'll fix up the typos with a follow-on.

Jon

> -Original Message-
> From: Russell, Kent 
> Sent: Tuesday, March 28, 2023 11:19 AM
> To: Kim, Jonathan ; amd-gfx@lists.freedesktop.org;
> dri-de...@lists.freedesktop.org
> Cc: Kuehling, Felix ; Kim, Jonathan
> 
> Subject: RE: [PATCH 12/34] drm/amdgpu: add configurable grace period for
> unmap queues
>
> [AMD Official Use Only - General]
>
> 3 tiny grammar/spelling things inline (not critical)
>
>  Kent
>
> > -Original Message-
> > From: amd-gfx  On Behalf Of
> > Jonathan Kim
> > Sent: Monday, March 27, 2023 2:43 PM
> > To: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org
> > Cc: Kuehling, Felix ; Kim, Jonathan
> > 
> > Subject: [PATCH 12/34] drm/amdgpu: add configurable grace period for
> unmap
> > queues
> >
> > The HWS schedule allows a grace period for wave completion prior to
> > preemption for better performance by avoiding CWSR on waves that can
> > potentially complete quickly. The debugger, on the other hand, will
> > want to inspect wave status immediately after it actively triggers
> > preemption (a suspend function to be provided).
> >
> > To minimize latency between preemption and debugger wave inspection,
> allow
> > immediate preemption by setting the grace period to 0.
> >
> > Note that setting the preepmtion grace period to 0 will result in an
> > infinite grace period being set due to a CP FW bug so set it to 1 for now.
> >
> > v2: clarify purpose in the description of this patch
> >
> > Signed-off-by: Jonathan Kim 
> > Reviewed-by: Felix Kuehling 
> > ---
> >  .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  |  2 +
> >  .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |  2 +
> >  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 43 
> >  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h|  6 ++
> >  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c  |  2 +
> >  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 43 
> >  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |  9 ++-
> >  .../drm/amd/amdkfd/kfd_device_queue_manager.c | 62 +-
> >  .../drm/amd/amdkfd/kfd_device_queue_manager.h |  2 +
> >  .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 32 +
> >  .../drm/amd/amdkfd/kfd_packet_manager_v9.c| 39 +++
> >  .../gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h   | 65
> +++
> >  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  5 ++
> >  13 files changed, 291 insertions(+), 21 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> > index a6f98141c29c..b811a0985050 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> > @@ -82,5 +82,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
> > .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
> > .enable_debug_trap = kgd_aldebaran_enable_debug_trap,
> > .disable_debug_trap = kgd_aldebaran_disable_debug_trap,
> > +   .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
> > +   .build_grace_period_packet_info =
> > kgd_gfx_v9_build_grace_period_packet_info,
> > .program_trap_handler_settings =
> > kgd_gfx_v9_program_trap_handler_settings,
> >  };
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> > index d2918e5c0dea..a62bd0068515 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> > @@ -410,6 +410,8 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
> >
> > kgd_gfx_v9_set_vm_context_page_table_base,
> > .enable_debug_trap = kgd_arcturus_enable_debug_trap,
> > .disable_debug_trap = kgd_arcturus_disable_debug_trap,
> > +   .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
> > +   .build_grace_period_packet_info =
> > kgd_gfx_v9_build_grace_period_packet_info,
> > .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
> > .program_trap_handler_settings =
> > kgd_gfx_v9_program_trap_handler_settings
> >  };
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> > index 969015281510..605387e55d33 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> > @@ -802,6 +802,47 @@ uint32_t kgd_gfx_v10_disable_debug_trap(struct
> > amdgpu_device *adev,
> > return 0;
> >  }
> >
> > +/* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2
> > values
> > + * The values read are:
> > + * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
> > + * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics
> > Offloads.
> > + * wrm_offload_wait_time-- Wait Count for WAIT_REG_MEM Offloads.
> > + * gws_wait_time-- Wait Count for Global Wave Syncs.
> 

[PATCH 5/7] drm/amdgpu/vcn: update ucode setup

2023-03-28 Thread Alex Deucher
From: James Zhu 

Use common amdgpu_vcn_setup_ucode for ucode setup.

Signed-off-by: James Zhu 
Acked-by: Leo Liu 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 11 +--
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index ddd844cca02e..5eaaac531ab6 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -87,16 +87,7 @@ static int vcn_v4_0_3_sw_init(void *handle)
if (r)
return r;
 
-   if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
-   const struct common_firmware_header *hdr;
-   hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
-   adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = 
AMDGPU_UCODE_ID_VCN;
-   adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw;
-   adev->firmware.fw_size +=
-   ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
-
-   DRM_DEV_INFO(adev->dev, "Will use PSP to load VCN firmware\n");
-   }
+   amdgpu_vcn_setup_ucode(adev);
 
r = amdgpu_vcn_resume(adev);
if (r)
-- 
2.39.2



[PATCH 6/7] drm/amdgpu/vcn: remove unused code

2023-03-28 Thread Alex Deucher
From: James Zhu 

Remove unused code.

Signed-off-by: James Zhu 
Acked-by: Leo Liu 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 121 
 1 file changed, 121 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 5eaaac531ab6..93c18fd7de77 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -416,117 +416,6 @@ static void vcn_v4_0_3_mc_resume_dpg_mode(struct 
amdgpu_device *adev, bool indir
VCN, 0, regUVD_GFX10_ADDR_CONFIG), 
adev->gfx.config.gb_addr_config, 0, indirect);
 }
 
-/**
- * vcn_v4_0_disable_static_power_gating - disable VCN static power gating
- *
- * @adev: amdgpu_device pointer
- *
- * Disable static power gating for VCN block
- */
-static void vcn_v4_0_3_disable_static_power_gating(struct amdgpu_device *adev)
-{
-   uint32_t data = 0;
-
-   if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
-   data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
-   | 1 << UVD_PGFSM_CONFIG__UVDS_PWR_CONFIG__SHIFT
-   | 1 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
-   | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
-   | 2 << UVD_PGFSM_CONFIG__UVDTC_PWR_CONFIG__SHIFT
-   | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
-   | 2 << UVD_PGFSM_CONFIG__UVDTA_PWR_CONFIG__SHIFT
-   | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
-   | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
-   | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
-   | 2 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
-   | 2 << UVD_PGFSM_CONFIG__UVDTB_PWR_CONFIG__SHIFT
-   | 2 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
-   | 2 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
-
-   WREG32_SOC15(VCN, 0, regUVD_PGFSM_CONFIG, data);
-   SOC15_WAIT_ON_RREG(VCN, 0, regUVD_PGFSM_STATUS,
-   UVD_PGFSM_STATUS__UVDM_UVDU_UVDLM_PWR_ON_3_0, 
0x3F3F);
-   } else {
-   data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
-   | 1 << UVD_PGFSM_CONFIG__UVDS_PWR_CONFIG__SHIFT
-   | 1 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
-   | 1 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
-   | 1 << UVD_PGFSM_CONFIG__UVDTC_PWR_CONFIG__SHIFT
-   | 1 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
-   | 1 << UVD_PGFSM_CONFIG__UVDTA_PWR_CONFIG__SHIFT
-   | 1 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
-   | 1 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
-   | 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
-   | 1 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
-   | 1 << UVD_PGFSM_CONFIG__UVDTB_PWR_CONFIG__SHIFT
-   | 1 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
-   | 1 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
-
-   WREG32_SOC15(VCN, 0, regUVD_PGFSM_CONFIG, data);
-   SOC15_WAIT_ON_RREG(VCN, 0, regUVD_PGFSM_STATUS, 0,  0x3F3F);
-   }
-
-   data = RREG32_SOC15(VCN, 0, regUVD_POWER_STATUS);
-   data &= ~0x103;
-   if (adev->pg_flags & AMD_PG_SUPPORT_VCN)
-   data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON |
-   UVD_POWER_STATUS__UVD_PG_EN_MASK;
-
-   WREG32_SOC15(VCN, 0, regUVD_POWER_STATUS, data);
-}
-
-/**
- * vcn_v4_0_3_enable_static_power_gating - enable VCN static power gating
- *
- * @adev: amdgpu_device pointer
- *
- * Enable static power gating for VCN block
- */
-static void vcn_v4_0_3_enable_static_power_gating(struct amdgpu_device *adev)
-{
-   uint32_t data;
-
-   if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
-   /* Before power off, this indicator has to be turned on */
-   data = RREG32_SOC15(VCN, 0, regUVD_POWER_STATUS);
-   data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK;
-   data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
-   WREG32_SOC15(VCN, 0, regUVD_POWER_STATUS, data);
-
-   data = (2 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
-   | 2 << UVD_PGFSM_CONFIG__UVDS_PWR_CONFIG__SHIFT
-   | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
-   | 2 << UVD_PGFSM_CONFIG__UVDTC_PWR_CONFIG__SHIFT
-   | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
-   | 2 << UVD_PGFSM_CONFIG__UVDTA_PWR_CONFIG__SHIFT
-   | 2 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
-   | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
-

[PATCH 7/7] drm/amdgpu/vcn: update amdgpu_fw_shared to amdgpu_vcn4_fw_shared

2023-03-28 Thread Alex Deucher
From: James Zhu 

Use amdgpu_vcn4_fw_shared for vcn 4.0.3.

Signed-off-by: James Zhu 
Acked-by: Leo Liu 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 40 +++--
 1 file changed, 11 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 93c18fd7de77..0b2b97593bac 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -79,7 +79,7 @@ static int vcn_v4_0_3_early_init(void *handle)
 static int vcn_v4_0_3_sw_init(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-   volatile struct amdgpu_fw_shared *fw_shared;
+   volatile struct amdgpu_vcn4_fw_shared *fw_shared;
struct amdgpu_ring *ring;
int r;
 
@@ -111,10 +111,8 @@ static int vcn_v4_0_3_sw_init(void *handle)
return r;
 
fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
-   fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SW_RING_FLAG) |
-cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG) |
-
cpu_to_le32(AMDGPU_VCN_FW_SHARED_FLAG_0_RB);
-   fw_shared->sw_ring.is_enabled = cpu_to_le32(true);
+   fw_shared->present_flag_0 = 0;
+   fw_shared->sq.is_enabled = cpu_to_le32(true);
 
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode;
@@ -135,11 +133,11 @@ static int vcn_v4_0_3_sw_fini(void *handle)
int r, idx;
 
if (drm_dev_enter(&adev->ddev, &idx)) {
-   volatile struct amdgpu_fw_shared *fw_shared;
+   volatile struct amdgpu_vcn4_fw_shared *fw_shared;
 
fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
fw_shared->present_flag_0 = 0;
-   fw_shared->sw_ring.is_enabled = cpu_to_le32(false);
+   fw_shared->sq.is_enabled = cpu_to_le32(false);
 
drm_dev_exit(idx);
}
@@ -304,7 +302,7 @@ static void vcn_v4_0_3_mc_resume(struct amdgpu_device *adev)
upper_32_bits(adev->vcn.inst->fw_shared.gpu_addr));
WREG32_SOC15(VCN, 0, regUVD_VCPU_NONCACHE_OFFSET0, 0);
WREG32_SOC15(VCN, 0, regUVD_VCPU_NONCACHE_SIZE0,
-   AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)));
+   AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
 }
 
 /**
@@ -407,7 +405,7 @@ static void vcn_v4_0_3_mc_resume_dpg_mode(struct 
amdgpu_device *adev, bool indir
VCN, 0, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
VCN, 0, regUVD_VCPU_NONCACHE_SIZE0),
-   AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 
0, indirect);
+   AMDGPU_GPU_PAGE_ALIGN(sizeof(struct 
amdgpu_vcn4_fw_shared)), 0, indirect);
 
/* VCN global tiling registers */
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
@@ -612,7 +610,7 @@ static void vcn_v4_0_3_enable_clock_gating(struct 
amdgpu_device *adev)
  */
 static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
 {
-   volatile struct amdgpu_fw_shared *fw_shared = 
adev->vcn.inst->fw_shared.cpu_addr;
+   volatile struct amdgpu_vcn4_fw_shared *fw_shared = 
adev->vcn.inst->fw_shared.cpu_addr;
struct amdgpu_ring *ring;
uint32_t tmp;
 
@@ -702,7 +700,6 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device 
*adev, bool indirect)
(uintptr_t)adev->vcn.inst->dpg_sram_cpu_addr));
 
ring = &adev->vcn.inst->ring_dec;
-   fw_shared->multi_queue.decode_queue_mode |= 
cpu_to_le32(FW_QUEUE_RING_RESET);
 
/* program the RB_BASE for ring buffer */
WREG32_SOC15(VCN, 0, regUVD_RB_BASE_LO4,
@@ -728,12 +725,8 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device 
*adev, bool indirect)
 
WREG32_SOC15(VCN, 0, regUVD_SCRATCH2, 0);
 
-   /* Reset FW shared memory RBC WPTR/RPTR */
-   fw_shared->rb.rptr = 0;
-   fw_shared->rb.wptr = lower_32_bits(ring->wptr);
-
/*resetting done, fw can check RB ring */
-   fw_shared->multi_queue.decode_queue_mode &= 
cpu_to_le32(~FW_QUEUE_RING_RESET);
+   fw_shared->sq.queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
 
return 0;
 }
@@ -747,7 +740,7 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device 
*adev, bool indirect)
  */
 static int vcn_v4_0_3_start(struct amdgpu_device *adev)
 {
-   volatile struct amdgpu_fw_shared *fw_shared;
+   volatile struct amdgpu_vcn4_fw_shared *fw_shared;
struct amdgpu_ring *ring;
uint32_t tmp;
int j, k, r;
@@ -878,7 +871,6 @@ static int vcn_v4_0_3_start(struct amdgpu_device *adev)
ring = &adev->vcn.inst->ring_dec;
 
fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
-   fw_shared->multi

[PATCH 4/7] drm/amdgpu/vcn: update new doorbell map

2023-03-28 Thread Alex Deucher
From: James Zhu 

New doorbell map is used for VCN 4.0.3.

Signed-off-by: James Zhu 
Acked-by: Leo Liu 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c   | 4 ++--
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c  | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index 8914f3c6c80f..e12e3646c49a 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -102,7 +102,7 @@ static int jpeg_v4_0_3_sw_init(void *handle)
ring = &adev->jpeg.inst->ring_dec[i];
ring->use_doorbell = true;
ring->vm_hub = AMDGPU_MMHUB0(0);
-   ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 
1) + (i?8:1) + i;
+   ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 
1) + 1 + i;
sprintf(ring->name, "jpeg_dec_%d", i);
r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
AMDGPU_RING_PRIO_DEFAULT, NULL);
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
index 27f456f32f82..40e694932a20 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
@@ -161,7 +161,7 @@ static void nbio_v7_9_vcn_doorbell_range(struct 
amdgpu_device *adev, bool use_do
doorbell_range = REG_SET_FIELD(doorbell_range,
DOORBELL0_CTRL_ENTRY_0,
BIF_DOORBELL0_RANGE_SIZE_ENTRY,
-   0x10);
+   0x9);
 
doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
S2A_DOORBELL_ENTRY_1_CTRL,
@@ -174,7 +174,7 @@ static void nbio_v7_9_vcn_doorbell_range(struct 
amdgpu_device *adev, bool use_do
S2A_DOORBELL_PORT1_RANGE_OFFSET, 0x4);
doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
S2A_DOORBELL_ENTRY_1_CTRL,
-   S2A_DOORBELL_PORT1_RANGE_SIZE, 0x10);
+   S2A_DOORBELL_PORT1_RANGE_SIZE, 0x9);
doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
S2A_DOORBELL_ENTRY_1_CTRL,
S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE, 0x4);
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index fafce2beb6cf..ddd844cca02e 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -110,7 +110,7 @@ static int vcn_v4_0_3_sw_init(void *handle)
 
ring = &adev->vcn.inst->ring_dec;
ring->use_doorbell = true;
-   ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 5;
+   ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1);
ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "vcn_dec");
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
@@ -176,7 +176,7 @@ static int vcn_v4_0_3_hw_init(void *handle)
int r;
 
adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
-   (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 
ring->me);
+   (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
if (ring->use_doorbell)
WREG32_SOC15(VCN, ring->me, regVCN_RB4_DB_CTRL,
ring->doorbell_index << VCN_RB4_DB_CTRL__OFFSET__SHIFT |
-- 
2.39.2



[PATCH 1/7] drm/amdgpu/vcn: use vcn4 irqsrc header for VCN 4.0.3

2023-03-28 Thread Alex Deucher
From: James Zhu 

Use vcn4 irqsrc header for VCN 4.0.3.

Signed-off-by: James Zhu 
Acked-by: Leo Liu 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index ae2cc47d344a..fafce2beb6cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -35,7 +35,7 @@
 
 #include "vcn/vcn_4_0_3_offset.h"
 #include "vcn/vcn_4_0_3_sh_mask.h"
-#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
 
 #define mmUVD_DPG_LMA_CTL  regUVD_DPG_LMA_CTL
 #define mmUVD_DPG_LMA_CTL_BASE_IDX regUVD_DPG_LMA_CTL_BASE_IDX
@@ -104,7 +104,7 @@ static int vcn_v4_0_3_sw_init(void *handle)
 
/* VCN DEC TRAP */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
-   VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, 
&adev->vcn.inst->irq);
+   VCN_4_0__SRCID__UVD_TRAP, &adev->vcn.inst->irq);
if (r)
return r;
 
@@ -1380,7 +1380,7 @@ static int vcn_v4_0_3_process_interrupt(struct 
amdgpu_device *adev,
DRM_DEV_DEBUG(adev->dev, "IH: VCN TRAP\n");
 
switch (entry->src_id) {
-   case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT:
+   case VCN_4_0__SRCID__UVD_TRAP:
amdgpu_fence_process(&adev->vcn.inst->ring_dec);
break;
default:
-- 
2.39.2



[PATCH 3/7] drm/amdgpu/jpeg: update jpeg header to support multiple AIDs

2023-03-28 Thread Alex Deucher
From: James Zhu 

Add aid_id in jpeg header to support multiple AIDs.

Signed-off-by: James Zhu 
Acked-by: Leo Liu 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
index cb6c127ab81d..5c200a508fa3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
@@ -40,6 +40,7 @@ struct amdgpu_jpeg_inst {
struct amdgpu_ring ring_dec[AMDGPU_MAX_JPEG_RINGS];
struct amdgpu_irq_src irq;
struct amdgpu_jpeg_reg external;
+   uint8_t aid_id;
 };
 
 struct amdgpu_jpeg_ras {
@@ -58,6 +59,7 @@ struct amdgpu_jpeg {
atomic_t total_submission_cnt;
struct ras_common_if*ras_if;
struct amdgpu_jpeg_ras  *ras;
+   uint8_t num_inst_per_aid;
 };
 
 int amdgpu_jpeg_sw_init(struct amdgpu_device *adev);
-- 
2.39.2



[PATCH 2/7] drm/amdgpu/vcn: update vcn header to support multiple AIDs

2023-03-28 Thread Alex Deucher
From: James Zhu 

Add aid_id in vcn header to support multiple AIDs

Signed-off-by: James Zhu 
Acked-by: Leo Liu 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index c730949ece7d..1024a06359ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -242,6 +242,7 @@ struct amdgpu_vcn_inst {
uint32_t*dpg_sram_curr_addr;
atomic_tdpg_enc_submission_cnt;
struct amdgpu_vcn_fw_shared fw_shared;
+   uint8_t aid_id;
 };
 
 struct amdgpu_vcn_ras {
@@ -271,6 +272,8 @@ struct amdgpu_vcn {
 
struct ras_common_if*ras_if;
struct amdgpu_vcn_ras   *ras;
+
+   uint8_t num_inst_per_aid;
 };
 
 struct amdgpu_fw_shared_rb_ptrs_struct {
-- 
2.39.2



RE: [PATCH 12/34] drm/amdgpu: add configurable grace period for unmap queues

2023-03-28 Thread Russell, Kent
[AMD Official Use Only - General]

3 tiny grammar/spelling things inline (not critical)

 Kent

> -Original Message-
> From: amd-gfx  On Behalf Of
> Jonathan Kim
> Sent: Monday, March 27, 2023 2:43 PM
> To: amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org
> Cc: Kuehling, Felix ; Kim, Jonathan
> 
> Subject: [PATCH 12/34] drm/amdgpu: add configurable grace period for unmap
> queues
> 
> The HWS schedule allows a grace period for wave completion prior to
> preemption for better performance by avoiding CWSR on waves that can
> potentially complete quickly. The debugger, on the other hand, will
> want to inspect wave status immediately after it actively triggers
> preemption (a suspend function to be provided).
> 
> To minimize latency between preemption and debugger wave inspection, allow
> immediate preemption by setting the grace period to 0.
> 
> Note that setting the preepmtion grace period to 0 will result in an
> infinite grace period being set due to a CP FW bug so set it to 1 for now.
> 
> v2: clarify purpose in the description of this patch
> 
> Signed-off-by: Jonathan Kim 
> Reviewed-by: Felix Kuehling 
> ---
>  .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  |  2 +
>  .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |  2 +
>  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 43 
>  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h|  6 ++
>  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c  |  2 +
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 43 
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |  9 ++-
>  .../drm/amd/amdkfd/kfd_device_queue_manager.c | 62 +-
>  .../drm/amd/amdkfd/kfd_device_queue_manager.h |  2 +
>  .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 32 +
>  .../drm/amd/amdkfd/kfd_packet_manager_v9.c| 39 +++
>  .../gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h   | 65 +++
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  5 ++
>  13 files changed, 291 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> index a6f98141c29c..b811a0985050 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> @@ -82,5 +82,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
>   .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
>   .enable_debug_trap = kgd_aldebaran_enable_debug_trap,
>   .disable_debug_trap = kgd_aldebaran_disable_debug_trap,
> + .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
> + .build_grace_period_packet_info =
> kgd_gfx_v9_build_grace_period_packet_info,
>   .program_trap_handler_settings =
> kgd_gfx_v9_program_trap_handler_settings,
>  };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> index d2918e5c0dea..a62bd0068515 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> @@ -410,6 +410,8 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
> 
>   kgd_gfx_v9_set_vm_context_page_table_base,
>   .enable_debug_trap = kgd_arcturus_enable_debug_trap,
>   .disable_debug_trap = kgd_arcturus_disable_debug_trap,
> + .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
> + .build_grace_period_packet_info =
> kgd_gfx_v9_build_grace_period_packet_info,
>   .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
>   .program_trap_handler_settings =
> kgd_gfx_v9_program_trap_handler_settings
>  };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> index 969015281510..605387e55d33 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> @@ -802,6 +802,47 @@ uint32_t kgd_gfx_v10_disable_debug_trap(struct
> amdgpu_device *adev,
>   return 0;
>  }
> 
> +/* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2
> values
> + * The values read are:
> + * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
> + * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics
> Offloads.
> + * wrm_offload_wait_time-- Wait Count for WAIT_REG_MEM Offloads.
> + * gws_wait_time-- Wait Count for Global Wave Syncs.
> + * que_sleep_wait_time  -- Wait Count for Dequeue Retry.
> + * sch_wave_wait_time   -- Wait Count for Scheduling Wave Message.
> + * sem_rearm_wait_time  -- Wait Count for Semaphore re-arm.
> + * deq_retry_wait_time  -- Wait Count for Global Wave Syncs.
> + */
> +void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
> + uint32_t *wait_times)
> +
> +{
> + *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0,
> mmCP_IQ_WAIT_TIME2));
> +}
> +
> +void kgd_gfx_v10_build_grace_period

[PATCH 6/6] drm/amdgpu: Change num_xcd to xcc_mask

2023-03-28 Thread Alex Deucher
From: Lijo Lazar 

Instead of number of XCCs, keep a mask of XCCs for the exact XCCs
available on the ASIC. XCC configuration could differ based on
different ASIC configs.

v2:
Rename num_xcd to num_xcc (Hawking)
Use smaller xcc_mask size, changed to u16 (Le)

Signed-off-by: Lijo Lazar 
Reviewed-by: Hawking Zhang 
Reviewed-by: Le Ma 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c  |  21 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |   4 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c|   2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c  | 133 +--
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c |  67 +++-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c|   3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_device.c  |  10 +-
 7 files changed, 141 insertions(+), 99 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 77a42f73c3de..2a781d3dcc05 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -209,12 +209,12 @@ void amdgpu_gfx_compute_queue_acquire(struct 
amdgpu_device *adev)
int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
 adev->gfx.mec.num_queue_per_pipe,
 adev->gfx.num_compute_rings);
-   int num_xcd = (adev->gfx.num_xcd > 1) ? adev->gfx.num_xcd : 1;
+   int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
 
if (multipipe_policy) {
/* policy: make queues evenly cross all pipes on MEC1 only
 * for multiple xcc, just use the original policy for 
simplicity */
-   for (j = 0; j < num_xcd; j++) {
+   for (j = 0; j < num_xcc; j++) {
for (i = 0; i < max_queues_per_mec; i++) {
pipe = i % adev->gfx.mec.num_pipe_per_mec;
queue = (i / adev->gfx.mec.num_pipe_per_mec) %
@@ -226,13 +226,13 @@ void amdgpu_gfx_compute_queue_acquire(struct 
amdgpu_device *adev)
}
} else {
/* policy: amdgpu owns all queues in the given pipe */
-   for (j = 0; j < num_xcd; j++) {
+   for (j = 0; j < num_xcc; j++) {
for (i = 0; i < max_queues_per_mec; ++i)
set_bit(i, 
adev->gfx.mec_bitmap[j].queue_bitmap);
}
}
 
-   for (j = 0; j < num_xcd; j++) {
+   for (j = 0; j < num_xcc; j++) {
dev_dbg(adev->dev, "mec queue bitmap weight=%d\n",
bitmap_weight(adev->gfx.mec_bitmap[j].queue_bitmap, 
AMDGPU_MAX_COMPUTE_QUEUES));
}
@@ -1129,23 +1129,24 @@ static ssize_t amdgpu_gfx_set_compute_partition(struct 
device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
enum amdgpu_gfx_partition mode;
-   int ret;
+   int ret = 0, num_xcc;
 
-   if (adev->gfx.num_xcd % 2 != 0)
+   num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+   if (num_xcc % 2 != 0)
return -EINVAL;
 
if (!strncasecmp("SPX", buf, strlen("SPX"))) {
mode = AMDGPU_SPX_PARTITION_MODE;
} else if (!strncasecmp("DPX", buf, strlen("DPX"))) {
-   if (adev->gfx.num_xcd != 4 || adev->gfx.num_xcd != 8)
+   if (num_xcc != 4 || num_xcc != 8)
return -EINVAL;
mode = AMDGPU_DPX_PARTITION_MODE;
} else if (!strncasecmp("TPX", buf, strlen("TPX"))) {
-   if (adev->gfx.num_xcd != 6)
+   if (num_xcc != 6)
return -EINVAL;
mode = AMDGPU_TPX_PARTITION_MODE;
} else if (!strncasecmp("QPX", buf, strlen("QPX"))) {
-   if (adev->gfx.num_xcd != 8)
+   if (num_xcc != 8)
return -EINVAL;
mode = AMDGPU_QPX_PARTITION_MODE;
} else if (!strncasecmp("CPX", buf, strlen("CPX"))) {
@@ -1175,7 +1176,7 @@ static ssize_t 
amdgpu_gfx_get_available_compute_partition(struct device *dev,
char *supported_partition;
 
/* TBD */
-   switch (adev->gfx.num_xcd) {
+   switch (NUM_XCC(adev->gfx.xcc_mask)) {
case 8:
supported_partition = "SPX, DPX, QPX, CPX";
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 0cf3591c13b2..8741f8c30ce0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -64,6 +64,8 @@ enum amdgpu_gfx_partition {
AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE,
 };
 
+#define NUM_XCC(x) hweight16(x)
+
 struct amdgpu_mec {
struct amdgpu_bo*hpd_eop_obj;
u64 hpd_eop_gpu_addr;
@@ -385,7 +387,7 @@ struct amdgpu_gfx {
struct amdgpu_ring_mux  muxer;
 

[PATCH 5/6] drm/amdgpu: add the support of XGMI link for GC 9.4.3

2023-03-28 Thread Alex Deucher
From: Shiwu Zhang 

Add the xgmi LFB_CNTL/LBF_SIZE reg addresses to fetch the xgmi info from.

v2: move get_xgmi_info() to GC_V9_4_3 sepecific source files to utilize
the register definitions specific for GC_V9_4_3
v3: remove the duplicated register definitions
v4: enable xgmi based on asic_type as XGMI_IP ver is not available
yet for IP discovery

Signed-off-by: Shiwu Zhang 
Reviewed-by: Le Ma 
Ack-by: Lijo Lazar 
Reviewed-by: Hawking Zhang 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c | 41 +++-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 10 --
 2 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
index d3424ce97aa8..1bb17d95f720 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
@@ -528,6 +528,45 @@ static void gfxhub_v1_2_init(struct amdgpu_device *adev)
}
 }
 
+static int gfxhub_v1_2_get_xgmi_info(struct amdgpu_device *adev)
+{
+   u32 max_num_physical_nodes;
+   u32 max_physical_node_id;
+   u32 xgmi_lfb_cntl;
+   u32 max_region;
+   u64 seg_size;
+
+   xgmi_lfb_cntl = RREG32_SOC15(GC, 0, regMC_VM_XGMI_LFB_CNTL);
+   seg_size = REG_GET_FIELD(
+   RREG32_SOC15(GC, 0, regMC_VM_XGMI_LFB_SIZE),
+   MC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24;
+   max_region =
+   REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, 
PF_MAX_REGION);
+
+
+
+   max_num_physical_nodes   = 8;
+   max_physical_node_id = 7;
+
+   /* PF_MAX_REGION=0 means xgmi is disabled */
+   if (max_region || adev->gmc.xgmi.connected_to_cpu) {
+   adev->gmc.xgmi.num_physical_nodes = max_region + 1;
+
+   if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes)
+   return -EINVAL;
+
+   adev->gmc.xgmi.physical_node_id =
+   REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL,
+   PF_LFB_REGION);
+
+   if (adev->gmc.xgmi.physical_node_id > max_physical_node_id)
+   return -EINVAL;
+
+   adev->gmc.xgmi.node_segment_size = seg_size;
+   }
+
+   return 0;
+}
 
 const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = {
.get_mc_fb_offset = gfxhub_v1_2_get_mc_fb_offset,
@@ -536,5 +575,5 @@ const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = {
.gart_disable = gfxhub_v1_2_gart_disable,
.set_fault_enable_default = gfxhub_v1_2_set_fault_enable_default,
.init = gfxhub_v1_2_init,
-   .get_xgmi_info = gfxhub_v1_1_get_xgmi_info,
+   .get_xgmi_info = gfxhub_v1_2_get_xgmi_info,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 44573689a5ba..7d2d1d4a6e38 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1402,9 +1402,13 @@ static int gmc_v9_0_early_init(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-   /* ARCT and VEGA20 don't have XGMI defined in their IP discovery tables 
*/
-   if (adev->asic_type == CHIP_VEGA20 ||
-   adev->asic_type == CHIP_ARCTURUS)
+   /*
+* 9.4.0, 9.4.1 and 9.4.3 don't have XGMI defined
+* in their IP discovery tables
+*/
+   if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0) ||
+   adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
+   adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
adev->gmc.xgmi.supported = true;
 
if (adev->ip_versions[XGMI_HWIP][0] == IP_VERSION(6, 1, 0)) {
-- 
2.39.2



[PATCH 2/6] drm/amdkfd: Add SDMA info for SDMA 4.4.2

2023-03-28 Thread Alex Deucher
From: Mukul Joshi 

Update SDMA queue information for SDMA 4.4.2.

Signed-off-by: Mukul Joshi 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 9ed32c1b2f5b..cd1aa711dd0b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -82,6 +82,7 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
case IP_VERSION(4, 2, 0):/* VEGA20 */
case IP_VERSION(4, 2, 2):/* ARCTURUS */
case IP_VERSION(4, 4, 0):/* ALDEBARAN */
+   case IP_VERSION(4, 4, 2):
case IP_VERSION(5, 0, 0):/* NAVI10 */
case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */
case IP_VERSION(5, 0, 2):/* NAVI14 */
-- 
2.39.2



[PATCH 3/6] drm/amdkfd: Populate memory info before adding GPU node to topology

2023-03-28 Thread Alex Deucher
From: Mukul Joshi 

The local memory info needs to be fetched before the GPU node is added
to topology. Without this, the sysfs is incorrectly populated and the
size is reported as 0. This was causing rocr tests to fail. This issue
was caused because of a bad merge.

Signed-off-by: Mukul Joshi 
Reviewed-by: Amber Lin 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index cd1aa711dd0b..1b4f3d34731b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -735,6 +735,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
node->max_proc_per_quantum = max_proc_per_quantum;
atomic_set(&node->sram_ecc_flag, 0);
 
+   amdgpu_amdkfd_get_local_mem_info(kfd->adev, &kfd->local_mem_info);
+
dev_info(kfd_device, "Total number of KFD nodes to be created: %d\n",
kfd->num_nodes);
for (i = 0; i < kfd->num_nodes; i++) {
@@ -785,8 +787,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
if (kfd_resume_iommu(kfd))
goto kfd_resume_iommu_error;
 
-   amdgpu_amdkfd_get_local_mem_info(kfd->adev, &kfd->local_mem_info);
-
kfd->init_complete = true;
dev_info(kfd_device, "added device %x:%x\n", kfd->adev->pdev->vendor,
 kfd->adev->pdev->device);
-- 
2.39.2



[PATCH 1/6] drm/amdkfd: Fix SDMA in CPX mode

2023-03-28 Thread Alex Deucher
From: Mukul Joshi 

When creating a user-mode SDMA queue, CP FW expects
driver to use/set virtual SDMA engine id in MAP_QUEUES
packet instead of using the physical SDMA engine id.
Each partition node's virtual SDMA number should start
from 0. However, when allocating doorbell for the queue,
KFD needs to allocate the doorbell from doorbell space
corresponding to the physical SDMA engine id, otherwise
the hwardware will not see the doorbell press.

Signed-off-by: Mukul Joshi 
Reviewed-by: Amber Lin 
Signed-off-by: Alex Deucher 
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 19 +++
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 62838d84b6f1..527a2e0eef81 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -363,7 +363,16 @@ static int allocate_doorbell(struct qcm_process_device 
*qpd,
 */
 
uint32_t *idx_offset = 
dev->kfd->shared_resources.sdma_doorbell_idx;
-   uint32_t valid_id = idx_offset[q->properties.sdma_engine_id]
+
+   /*
+* q->properties.sdma_engine_id corresponds to the virtual
+* sdma engine number. However, for doorbell allocation,
+* we need the physical sdma engine id in order to get the
+* correct doorbell offset.
+*/
+   uint32_t valid_id = idx_offset[qpd->dqm->dev->node_id *
+  
get_num_all_sdma_engines(qpd->dqm) +
+  q->properties.sdma_engine_id]
+ (q->properties.sdma_queue_id 
& 1)
* 
KFD_QUEUE_DOORBELL_MIRROR_OFFSET
+ (q->properties.sdma_queue_id 
>> 1);
@@ -1388,7 +1397,6 @@ static int allocate_sdma_queue(struct 
device_queue_manager *dqm,
}
 
q->properties.sdma_engine_id =
-   dqm->dev->node_id * get_num_all_sdma_engines(dqm) +
q->sdma_id % kfd_get_num_sdma_engines(dqm->dev);
q->properties.sdma_queue_id = q->sdma_id /
kfd_get_num_sdma_engines(dqm->dev);
@@ -1418,7 +1426,6 @@ static int allocate_sdma_queue(struct 
device_queue_manager *dqm,
 * PCIe-optimized ones
 */
q->properties.sdma_engine_id =
-   dqm->dev->node_id * get_num_all_sdma_engines(dqm) +
kfd_get_num_sdma_engines(dqm->dev) +
q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev);
q->properties.sdma_queue_id = q->sdma_id /
@@ -2486,6 +2493,7 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
int pipe, queue;
int r = 0, xcc;
uint32_t inst;
+   uint32_t sdma_engine_start;
 
if (!dqm->sched_running) {
seq_puts(m, " Device is stopped\n");
@@ -2530,7 +2538,10 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
}
}
 
-   for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) {
+   sdma_engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm);
+   for (pipe = sdma_engine_start;
+pipe < (sdma_engine_start + get_num_all_sdma_engines(dqm));
+pipe++) {
for (queue = 0;
 queue < 
dqm->dev->kfd->device_info.num_sdma_queues_per_engine;
 queue++) {
-- 
2.39.2



[PATCH 4/6] drm/amdgpu: add new vram type for dgpu

2023-03-28 Thread Alex Deucher
From: Hawking Zhang 

hbm3 will be supported in some dgpu program

Signed-off-by: Hawking Zhang 
Reviewed-by: Lijo Lazar 
Reviewed-by: Le Ma 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c | 1 +
 drivers/gpu/drm/amd/include/atomfirmware.h   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index ac6fe0ae4609..ef4b9a41f20a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -272,6 +272,7 @@ static int convert_atom_mem_type_to_vram_type(struct 
amdgpu_device *adev,
break;
case ATOM_DGPU_VRAM_TYPE_HBM2:
case ATOM_DGPU_VRAM_TYPE_HBM2E:
+   case ATOM_DGPU_VRAM_TYPE_HBM3:
vram_type = AMDGPU_VRAM_TYPE_HBM;
break;
case ATOM_DGPU_VRAM_TYPE_GDDR6:
diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h 
b/drivers/gpu/drm/amd/include/atomfirmware.h
index bbe1337a8cee..e68c1e280322 100644
--- a/drivers/gpu/drm/amd/include/atomfirmware.h
+++ b/drivers/gpu/drm/amd/include/atomfirmware.h
@@ -182,6 +182,7 @@ enum atom_dgpu_vram_type {
   ATOM_DGPU_VRAM_TYPE_HBM2  = 0x60,
   ATOM_DGPU_VRAM_TYPE_HBM2E = 0x61,
   ATOM_DGPU_VRAM_TYPE_GDDR6 = 0x70,
+  ATOM_DGPU_VRAM_TYPE_HBM3 = 0x80,
 };
 
 enum atom_dp_vs_preemph_def{
-- 
2.39.2



[PATCH 20/32] drm/amdgpu: Set XNACK per process on GC 9.4.3

2023-03-28 Thread Alex Deucher
From: Amber Lin 

Set RETRY_PERMISSION_OR_INVALID_PAGE_FAULT bit in VM_CONTEXT1_CNTL
as well so XNACK can be enabled in the SQ per process.

Signed-off-by: Amber Lin 
Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c | 6 --
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c  | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
index e5016fea1f28..d74621662311 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
@@ -328,13 +328,15 @@ static void gfxhub_v1_2_setup_vmid_config(struct 
amdgpu_device *adev)
PAGE_TABLE_BLOCK_SIZE,
block_size);
/* Send no-retry XNACK on fault to suppress VM fault 
storm.
-* On Aldebaran, XNACK can be enabled in the SQ 
per-process.
+* On 9.4.2 and 9.4.3, XNACK can be enabled in
+* the SQ per-process.
 * Retry faults need to be enabled for that to work.
 */
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,

RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
!adev->gmc.noretry ||
-   adev->asic_type == CHIP_ALDEBARAN);
+   adev->ip_versions[GC_HWIP][0] == 
IP_VERSION(9, 4, 2) ||
+   adev->ip_versions[GC_HWIP][0] == 
IP_VERSION(9, 4, 3));
WREG32_SOC15_OFFSET(GC, j, regVM_CONTEXT1_CNTL,
i * hub->ctx_distance, tmp);
WREG32_SOC15_OFFSET(GC, j,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
index 9ec06f9db761..3883758b7993 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
@@ -288,7 +288,7 @@ static void mmhub_v1_8_setup_vmid_config(struct 
amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
PAGE_TABLE_BLOCK_SIZE,
block_size);
-   /* On Aldebaran, XNACK can be enabled in the SQ per-process.
+   /* On 9.4.3, XNACK can be enabled in the SQ per-process.
 * Retry faults need to be enabled for that to work.
 */
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-- 
2.39.2



[PATCH 28/32] drm/amdgpu: set mmhub bitmask for multiple AIDs

2023-03-28 Thread Alex Deucher
From: Le Ma 

Like GFXHUB, set MMHUB0 bitmask for each AID.

Signed-off-by: Le Ma 
Acked-by: Felix Kuehling 
Reviewed-by: Lijo Lazar 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index cb8ea1f5bc44..44573689a5ba 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1713,7 +1713,7 @@ static int gmc_v9_0_sw_init(void *handle)
break;
case IP_VERSION(9, 4, 3):
bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0), 
adev->gfx.num_xcd);
-   bitmap_set(adev->vmhubs_mask, AMDGPU_MMHUB0(0), 1);
+   bitmap_set(adev->vmhubs_mask, AMDGPU_MMHUB0(0), adev->num_aid);
 
amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
break;
-- 
2.39.2



[PATCH 25/32] drm/amdgpu: do mmhub init for multiple AIDs

2023-03-28 Thread Alex Deucher
From: Le Ma 

Mmhub on each AID needs to be initialized respectively

Signed-off-by: Le Ma 
Acked-by: Felix Kuehling 
Reviewed-by: Lijo Lazar 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 609 ++--
 1 file changed, 348 insertions(+), 261 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
index 3883758b7993..67338cb3d7bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
@@ -53,18 +53,27 @@ static u64 mmhub_v1_8_get_fb_location(struct amdgpu_device 
*adev)
 static void mmhub_v1_8_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t 
vmid,
uint64_t page_table_base)
 {
-   struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
-
-   WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
-   hub->ctx_addr_distance * vmid, 
lower_32_bits(page_table_base));
+   struct amdgpu_vmhub *hub;
+   int i;
 
-   WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
-   hub->ctx_addr_distance * vmid, 
upper_32_bits(page_table_base));
+   for (i = 0; i < adev->num_aid; i++) {
+   hub = &adev->vmhub[AMDGPU_MMHUB0(i)];
+   WREG32_SOC15_OFFSET(MMHUB, i,
+   regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+   hub->ctx_addr_distance * vmid,
+   lower_32_bits(page_table_base));
+
+   WREG32_SOC15_OFFSET(MMHUB, i,
+   regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+   hub->ctx_addr_distance * vmid,
+   upper_32_bits(page_table_base));
+   }
 }
 
 static void mmhub_v1_8_init_gart_aperture_regs(struct amdgpu_device *adev)
 {
uint64_t pt_base;
+   int i;
 
if (adev->gmc.pdb0_bo)
pt_base = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo);
@@ -76,27 +85,37 @@ static void mmhub_v1_8_init_gart_aperture_regs(struct 
amdgpu_device *adev)
/* If use GART for FB translation, vmid0 page table covers both
 * vram and system memory (gart)
 */
-   if (adev->gmc.pdb0_bo) {
-   WREG32_SOC15(MMHUB, 0, 
regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
-(u32)(adev->gmc.fb_start >> 12));
-   WREG32_SOC15(MMHUB, 0, 
regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
-(u32)(adev->gmc.fb_start >> 44));
-
-   WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
-(u32)(adev->gmc.gart_end >> 12));
-   WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
-(u32)(adev->gmc.gart_end >> 44));
-
-   } else {
-   WREG32_SOC15(MMHUB, 0, 
regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
-(u32)(adev->gmc.gart_start >> 12));
-   WREG32_SOC15(MMHUB, 0, 
regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
-(u32)(adev->gmc.gart_start >> 44));
-
-   WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
-(u32)(adev->gmc.gart_end >> 12));
-   WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
-(u32)(adev->gmc.gart_end >> 44));
+   for (i = 0; i < adev->num_aid; i++) {
+   if (adev->gmc.pdb0_bo) {
+   WREG32_SOC15(MMHUB, i,
+regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+(u32)(adev->gmc.fb_start >> 12));
+   WREG32_SOC15(MMHUB, i,
+regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+(u32)(adev->gmc.fb_start >> 44));
+
+   WREG32_SOC15(MMHUB, i,
+regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+(u32)(adev->gmc.gart_end >> 12));
+   WREG32_SOC15(MMHUB, i,
+regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+(u32)(adev->gmc.gart_end >> 44));
+
+   } else {
+   WREG32_SOC15(MMHUB, i,
+regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+(u32)(adev->gmc.gart_start >> 12));
+   WREG32_SOC15(MMHUB, i,
+regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+(u32)(adev->gmc.gart_start >> 44));
+
+   WREG32_SOC15(MMHUB, i,
+regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+(u32)(adev

[PATCH 29/32] drm/amdgpu: convert the doorbell_index to 2 dwords offset for kiq

2023-03-28 Thread Alex Deucher
From: Le Ma 

KIQ doorbell_index is non-zero from XCC1, thus need to left-shift it like
other rings.

Signed-off-by: Le Ma 
Acked-by: Felix Kuehling 
Reviewed-by: Lijo Lazar 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 5ef6fbe354c9..d48f80469533 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -313,14 +313,13 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
ring->adev = NULL;
ring->ring_obj = NULL;
ring->use_doorbell = true;
-   ring->doorbell_index = adev->doorbell_index.kiq;
ring->xcc_id = xcc_id;
ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
if (xcc_id >= 1)
-   ring->doorbell_index = adev->doorbell_index.xcc1_kiq_start +
-   xcc_id - 1;
+   ring->doorbell_index = (adev->doorbell_index.xcc1_kiq_start +
+   xcc_id - 1) << 1;
else
-   ring->doorbell_index = adev->doorbell_index.kiq;
+   ring->doorbell_index = adev->doorbell_index.kiq << 1;
 
r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id);
if (r)
-- 
2.39.2



[PATCH 31/32] drm/amdgpu: introduce new doorbell assignment table for GC 9.4.3

2023-03-28 Thread Alex Deucher
From: Le Ma 

Four basic reasons as below to do the change:
  1. number of ring expand a lot on GC 9.4.3, and adjustment on old
 assignment cannot make each ring in a continuous doorbell space.
  2. the SDMA doorbell index should not exceed 0x1FF on SDMA 4.2.2 due to
 regDOORBELLx_CTRL_ENTRY.BIF_DOORBELLx_RANGE_OFFSET_ENTRY field width.
  3. re-design the doorbell assignment and unify the calculation as
 "start + ring/inst id" will make the code much concise.
  4. only defining the START/END makes the table look simple

v2: (Lijo)
  1. replace name
  2. use num_inst_per_aid/sdma_doorbell_range instead of hardcoding

Signed-off-by: Le Ma 
Reviewed-by: Lijo Lazar 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 32 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c  |  6 +---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c  |  8 +
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 19 ++--
 4 files changed, 33 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
index 613bc035f2e4..93cbb307db93 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
@@ -54,7 +54,7 @@ struct amdgpu_doorbell_index {
uint32_t gfx_ring1;
uint32_t gfx_userqueue_start;
uint32_t gfx_userqueue_end;
-   uint32_t sdma_engine[8];
+   uint32_t sdma_engine[16];
uint32_t mes_ring0;
uint32_t mes_ring1;
uint32_t ih;
@@ -78,9 +78,6 @@ struct amdgpu_doorbell_index {
};
uint32_t first_non_cp;
uint32_t last_non_cp;
-   uint32_t xcc1_kiq_start;
-   uint32_t xcc1_mec_ring0_start;
-   uint32_t aid1_sdma_start;
uint32_t max_assignment;
/* Per engine SDMA doorbell size in dword */
uint32_t sdma_doorbell_range;
@@ -307,6 +304,33 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
AMDGPU_DOORBELL64_INVALID = 0x
 } AMDGPU_DOORBELL64_ASSIGNMENT;
 
+typedef enum _AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1
+{
+   /* KIQ: 0~7 for maximum 8 XCD */
+   AMDGPU_DOORBELL_LAYOUT1_KIQ_START   = 0x000,
+   AMDGPU_DOORBELL_LAYOUT1_HIQ = 0x008,
+   AMDGPU_DOORBELL_LAYOUT1_DIQ = 0x009,
+   /* Compute: 0x0A ~ 0x49 */
+   AMDGPU_DOORBELL_LAYOUT1_MEC_RING_START  = 0x00A,
+   AMDGPU_DOORBELL_LAYOUT1_MEC_RING_END= 0x049,
+   AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_START = 0x04A,
+   AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_END   = 0x0C9,
+   /* SDMA: 0x100 ~ 0x19F */
+   AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START   = 0x100,
+   AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_END = 0x19F,
+   /* IH: 0x1A0 ~ 0x1AF */
+   AMDGPU_DOORBELL_LAYOUT1_IH  = 0x1A0,
+   /* VCN: 0x1B0 ~ 0x1C2 */
+   AMDGPU_DOORBELL_LAYOUT1_VCN_START   = 0x1B0,
+   AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1C2,
+
+   AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP= 
AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START,
+   AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP = 
AMDGPU_DOORBELL_LAYOUT1_VCN_END,
+
+   AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT  = 0x1C2,
+   AMDGPU_DOORBELL_LAYOUT1_INVALID = 0x
+} AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1;
+
 u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index);
 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
 u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index d48f80469533..77a42f73c3de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -315,11 +315,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
ring->use_doorbell = true;
ring->xcc_id = xcc_id;
ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
-   if (xcc_id >= 1)
-   ring->doorbell_index = (adev->doorbell_index.xcc1_kiq_start +
-   xcc_id - 1) << 1;
-   else
-   ring->doorbell_index = adev->doorbell_index.kiq << 1;
+   ring->doorbell_index = (adev->doorbell_index.kiq + xcc_id) << 1;
 
r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index bd375e472823..d103832630f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -822,13 +822,7 @@ static int gfx_v9_4_3_compute_ring_init(struct 
amdgpu_device *adev, int ring_id,
 
ring->ring_obj = NULL;
ring->use_doorbell = true;
-   if (xcc_id >= 1)
-   ring->doorbell_index =
-   (adev->doorbell_index.xcc1_mec_ring0_start +

[PATCH 12/32] drm/amdkfd: Update context save handling for multi XCC setup (v2)

2023-03-28 Thread Alex Deucher
From: Mukul Joshi 

Context save handling needs to be updated for a multi XCC
setup:
- On a multi XCC setup, KFD needs to report context save base
  address and size for each XCC in MQD.
- Thunk will allocate a large context save area covering all
  XCCs which will be equal to: num_of_xccs in a partition * size
  of context save area for 1 XCC. However, it will report only the
  size of context save area for 1 XCC only in the ioctl call.
- Driver then setups the MQD correctly using the size passed from
  Thunk and information about number of XCCs in a partition.
- Update get_wave_state function to return context save area
  for all XCCs in the partition.

v2: update the get_wave_state function for mqd manager v11 (Morris)

Signed-off-by: Mukul Joshi 
Tested-by: Amber Lin 
Reviewed-by: Felix Kuehling 
Signed-off-by: Morris Zhang 
Signed-off-by: Alex Deucher 
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c |  4 +-
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h  |  1 +
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  |  1 +
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  |  1 +
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   | 62 ++-
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c   |  1 +
 6 files changed, 67 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index a49ac19ca12e..07a04c41e92a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -2095,8 +2095,8 @@ static int get_wave_state(struct device_queue_manager 
*dqm,
 * and the queue should be protected against destruction by the process
 * lock.
 */
-   return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack,
-   ctl_stack_used_size, save_area_used_size);
+   return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, &q->properties,
+   ctl_stack, ctl_stack_used_size, save_area_used_size);
 }
 
 static void get_queue_checkpoint_info(struct device_queue_manager *dqm,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
index eb18be74f559..23158db7da03 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -97,6 +97,7 @@ struct mqd_manager {
uint32_t queue_id);
 
int (*get_wave_state)(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
  void __user *ctl_stack,
  u32 *ctl_stack_used_size,
  u32 *save_area_used_size);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index d54c6fdebbb6..772c09b5821b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -227,6 +227,7 @@ static uint32_t read_doorbell_id(void *mqd)
 }
 
 static int get_wave_state(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
  void __user *ctl_stack,
  u32 *ctl_stack_used_size,
  u32 *save_area_used_size)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
index e7acde3a849b..31f7732166fa 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
@@ -253,6 +253,7 @@ static uint32_t read_doorbell_id(void *mqd)
 }
 
 static int get_wave_state(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
  void __user *ctl_stack,
  u32 *ctl_stack_used_size,
  u32 *save_area_used_size)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 89f8ba8a127c..09083e905fee 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -294,6 +294,7 @@ static uint32_t read_doorbell_id(void *mqd)
 }
 
 static int get_wave_state(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
  void __user *ctl_stack,
  u32 *ctl_stack_used_size,
  u32 *save_area_used_size)
@@ -560,6 +561,7 @@ static void init_mqd_v9_4_3(struct mqd_manager *mm, void 
**mqd,
int xcc = 0;
struct kfd_mem_obj xcc_mqd_mem_obj;
uint64_t xcc_gart_addr = 0;
+   uint64_t xcc_ctx_save_restore_area_address;
uint64_t offset = mm->mqd_stride(mm, q);
 
memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj));
@@ -569,6 +571,23 @@ static void init_mqd_v9_4_3(struct mqd_manager *mm, void 
*

[PATCH 23/32] drm/amdgpu: add support for SDMA on multiple AIDs

2023-03-28 Thread Alex Deucher
From: Le Ma 

Initialize SDMA instances on each AID.

v2: revise coding fault in hw_fini

Signed-off-by: Le Ma 
Acked-by: Felix Kuehling 
Reviewed-by: Lijo Lazar 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h |  8 
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 20 +---
 2 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 67975dcede5d..632b77138fe4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -37,6 +37,14 @@ enum amdgpu_sdma_irq {
AMDGPU_SDMA_IRQ_INSTANCE5,
AMDGPU_SDMA_IRQ_INSTANCE6,
AMDGPU_SDMA_IRQ_INSTANCE7,
+   AMDGPU_SDMA_IRQ_INSTANCE8,
+   AMDGPU_SDMA_IRQ_INSTANCE9,
+   AMDGPU_SDMA_IRQ_INSTANCE10,
+   AMDGPU_SDMA_IRQ_INSTANCE11,
+   AMDGPU_SDMA_IRQ_INSTANCE12,
+   AMDGPU_SDMA_IRQ_INSTANCE13,
+   AMDGPU_SDMA_IRQ_INSTANCE14,
+   AMDGPU_SDMA_IRQ_INSTANCE15,
AMDGPU_SDMA_IRQ_LAST
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index 441d6911fd20..7deadea03caa 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -1253,9 +1253,10 @@ static int sdma_v4_4_2_sw_init(void *handle)
struct amdgpu_ring *ring;
int r, i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+   u32 aid_id;
 
/* SDMA trap event */
-   for (i = 0; i < adev->sdma.num_instances; i++) {
+   for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
  SDMA0_4_0__SRCID__SDMA_TRAP,
  &adev->sdma.trap_irq);
@@ -1264,7 +1265,7 @@ static int sdma_v4_4_2_sw_init(void *handle)
}
 
/* SDMA SRAM ECC event */
-   for (i = 0; i < adev->sdma.num_instances; i++) {
+   for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
  SDMA0_4_0__SRCID__SDMA_SRAM_ECC,
  &adev->sdma.ecc_irq);
@@ -1273,7 +1274,7 @@ static int sdma_v4_4_2_sw_init(void *handle)
}
 
/* SDMA VM_HOLE/DOORBELL_INV/POLL_TIMEOUT/SRBM_WRITE_PROTECTION event*/
-   for (i = 0; i < adev->sdma.num_instances; i++) {
+   for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
  SDMA0_4_0__SRCID__SDMA_VM_HOLE,
  &adev->sdma.vm_hole_irq);
@@ -1303,15 +1304,17 @@ static int sdma_v4_4_2_sw_init(void *handle)
ring = &adev->sdma.instance[i].ring;
ring->ring_obj = NULL;
ring->use_doorbell = true;
+   aid_id = adev->sdma.instance[i].aid_id;
 
DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,
ring->use_doorbell?"true":"false");
 
/* doorbell size is 2 dwords, get DWORD offset */
ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1;
-   ring->vm_hub = AMDGPU_MMHUB0(0);
+   ring->vm_hub = AMDGPU_MMHUB0(aid_id);
 
-   sprintf(ring->name, "sdma%d", i);
+   sprintf(ring->name, "sdma%d.%d", aid_id,
+   i % adev->sdma.num_inst_per_aid);
r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
 AMDGPU_SDMA_IRQ_INSTANCE0 + i,
 AMDGPU_RING_PRIO_DEFAULT, NULL);
@@ -1327,9 +1330,10 @@ static int sdma_v4_4_2_sw_init(void *handle)
 * gfx queue on the same instance
 */
ring->doorbell_index = 
(adev->doorbell_index.sdma_engine[i] + 1) << 1;
-   ring->vm_hub = AMDGPU_MMHUB0(0);
+   ring->vm_hub = AMDGPU_MMHUB0(aid_id);
 
-   sprintf(ring->name, "page%d", i);
+   sprintf(ring->name, "page%d.%d", aid_id,
+   i % adev->sdma.num_inst_per_aid);
r = amdgpu_ring_init(adev, ring, 1024,
 &adev->sdma.trap_irq,
 AMDGPU_SDMA_IRQ_INSTANCE0 + i,
@@ -1811,6 +1815,8 @@ static void sdma_v4_4_2_set_ring_funcs(struct 
amdgpu_device *adev)
&sdma_v4_4_2_page_ring_funcs;
adev->sdma.instance[i].page.me = i;
}
+
+   adev->sdma.instance[i].aid_id = i / adev->sdma.num_inst_per_aid;
}
 }
 
-- 
2.39.2



[PATCH 22/32] drm/amdgpu: adjust some basic elements for multiple AID case

2023-03-28 Thread Alex Deucher
From: Le Ma 

add some elements below:
 - num_aid
 - aid_id for each sdma instance
 - num_inst_per_aid for sdma

and extend macro size below:
 - SDMA_MAX_INSTANCES to 16
 - AMDGPU_MAX_RINGS to 96
 - AMDGPU_MAX_HWIP_RINGS to 32

v2: move aid_id from amdgpu_ring to amdgpu_sdma_instance. (Lijo)

Signed-off-by: Le Ma 
Acked-by: Felix Kuehling 
Reviewed-by: Lijo Lazar 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h  | 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 4 +++-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 786b28821324..ca9c4d31e352 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1072,6 +1072,7 @@ struct amdgpu_device {
 
booljob_hang;
booldc_enabled;
+   uint32_tnum_aid;
 };
 
 static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 72a7afb852d0..f4467e5cf882 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -36,8 +36,8 @@ struct amdgpu_job;
 struct amdgpu_vm;
 
 /* max number of rings */
-#define AMDGPU_MAX_RINGS   28
-#define AMDGPU_MAX_HWIP_RINGS  8
+#define AMDGPU_MAX_RINGS   96
+#define AMDGPU_MAX_HWIP_RINGS  32
 #define AMDGPU_MAX_GFX_RINGS   2
 #define AMDGPU_MAX_SW_GFX_RINGS 2
 #define AMDGPU_MAX_COMPUTE_RINGS   8
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index fc8528812598..67975dcede5d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -26,7 +26,7 @@
 #include "amdgpu_ras.h"
 
 /* max number of IP instances */
-#define AMDGPU_MAX_SDMA_INSTANCES  8
+#define AMDGPU_MAX_SDMA_INSTANCES  16
 
 enum amdgpu_sdma_irq {
AMDGPU_SDMA_IRQ_INSTANCE0  = 0,
@@ -49,6 +49,7 @@ struct amdgpu_sdma_instance {
struct amdgpu_ring  ring;
struct amdgpu_ring  page;
boolburst_nop;
+   uint32_taid_id;
 };
 
 struct amdgpu_sdma_ras {
@@ -66,6 +67,7 @@ struct amdgpu_sdma {
struct amdgpu_irq_src   srbm_write_irq;
 
int num_instances;
+   int num_inst_per_aid;
uint32_tsrbm_soft_reset;
boolhas_page_queue;
struct ras_common_if*ras_if;
-- 
2.39.2



[PATCH 32/32] drm/amdkfd: add gpu compute cores io links for gfx9.4.3

2023-03-28 Thread Alex Deucher
From: Jonathan Kim 

The PSP TA will only provide xGMI topology info for links between GPU
sockets so links between partitions from different sockets will be
hardcoded as 3 xGMI hops with 1 hops weighted as xGMI and 2 hops
weighted with a new intra-socket weight to indicate the longest
possible distance.

If the link between a partition and the CPU is non-PCIe, then assume
the CPU (CCDs) is located within the same socket as the partition
and represent the link as an intra-socket weighted single hop XGMI link
with memory bandwidth.

Links between partitions within a single socket will be abstracted as
single hop xGMI links weighted with the new intra-socket weight and
will have memory bandwidth.

Finally, use the unused function bits in the location ID to represent the
coordinates of the compute partition within its socket.

A follow on patch will resolve the requirement for GPU socket xGMI
link representation sometime later.

Signed-off-by: Jonathan Kim 
Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 49 ---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.h |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_device.c   |  8 
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c |  4 ++
 4 files changed, 47 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index f5aebba31e88..dc93a67257e1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1166,7 +1166,7 @@ static int kfd_parse_subtype_iolink(struct 
crat_subtype_iolink *iolink,
if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
props->weight = 20;
else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI)
-   props->weight = 15 * iolink->num_hops_xgmi;
+   props->weight = iolink->weight_xgmi;
else
props->weight = node_distance(id_from, id_to);
 
@@ -1972,6 +1972,9 @@ static void kfd_find_numa_node_in_srat(struct kfd_node 
*kdev)
 }
 #endif
 
+#define KFD_CRAT_INTRA_SOCKET_WEIGHT   13
+#define KFD_CRAT_XGMI_WEIGHT   15
+
 /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
  * to its NUMA node
  * @avail_size: Available size in the memory
@@ -2003,6 +2006,12 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int 
*avail_size,
 * TODO: Fill-in other fields of iolink subtype
 */
if (kdev->adev->gmc.xgmi.connected_to_cpu) {
+   bool ext_cpu = KFD_GC_VERSION(kdev) != IP_VERSION(9, 4, 3);
+   int mem_bw = 819200, weight = ext_cpu ? KFD_CRAT_XGMI_WEIGHT :
+   
KFD_CRAT_INTRA_SOCKET_WEIGHT;
+   uint32_t bandwidth = ext_cpu ? 
amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(
+   kdev->adev, NULL, true) 
: mem_bw;
+
/*
 * with host gpu xgmi link, host can access gpu memory whether
 * or not pcie bar type is large, so always create bidirectional
@@ -2010,14 +2019,9 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int 
*avail_size,
 */
sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
-   sub_type_hdr->num_hops_xgmi = 1;
-   if (KFD_GC_VERSION(kdev) == IP_VERSION(9, 4, 2)) {
-   sub_type_hdr->minimum_bandwidth_mbs =
-   amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(
-   kdev->adev, NULL, true);
-   sub_type_hdr->maximum_bandwidth_mbs =
-   sub_type_hdr->minimum_bandwidth_mbs;
-   }
+   sub_type_hdr->weight_xgmi = weight;
+   sub_type_hdr->minimum_bandwidth_mbs = bandwidth;
+   sub_type_hdr->maximum_bandwidth_mbs = bandwidth;
} else {
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;
sub_type_hdr->minimum_bandwidth_mbs =
@@ -2050,6 +2054,8 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
uint32_t proximity_domain_from,
uint32_t proximity_domain_to)
 {
+   bool use_ta_info = kdev->kfd->num_nodes == 1;
+
*avail_size -= sizeof(struct crat_subtype_iolink);
if (*avail_size < 0)
return -ENOMEM;
@@ -2064,12 +2070,25 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int 
*avail_size,
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
sub_type_hdr->proximity_domain_from = proximity_domain_from;
sub_type_hdr->proximity_domain_to = proximity_domain_to;
-   sub_type_hdr->num_hops_xgmi =
-   

[PATCH 21/32] drm/amdgpu: assign the doorbell index in 1st page to sdma page queue

2023-03-28 Thread Alex Deucher
From: Le Ma 

Previously for vega10, the sdma_doorbell_range is only enough for sdma
gfx queue, thus the index on second doorbell page is allocated for sdma
page queue. From vega20, the sdma_doorbell_range on 1st page is enlarged.
Therefore, just leverage these index instead of allocation on 2nd page.

v2: change "(x << 1) + 2" to "(x + 1) << 1" for readability and add comments.

Signed-off-by: Le Ma 
Acked-by: Felix Kuehling 
Reviewed-by: Lijo Lazar 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 ++-
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 14 --
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c   |  7 +++
 3 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1c657273d9ee..518e89a7d9ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1090,7 +1090,8 @@ static int amdgpu_device_doorbell_init(struct 
amdgpu_device *adev)
 * doorbells are in the first page. So with paging queue 
enabled,
 * the max num_doorbells should + 1 page (0x400 in dword)
 */
-   if (adev->asic_type >= CHIP_VEGA10)
+   if (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(4, 0, 0) &&
+   adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(4, 2, 0))
adev->doorbell.num_doorbells += 0x400;
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index dfe91ebc7b43..68c6d0c8fe76 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1847,8 +1847,18 @@ static int sdma_v4_0_sw_init(void *handle)
/* paging queue use same doorbell index/routing as gfx 
queue
 * with 0x400 (4096 dwords) offset on second doorbell 
page
 */
-   ring->doorbell_index = 
adev->doorbell_index.sdma_engine[i] << 1;
-   ring->doorbell_index += 0x400;
+   if (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(4, 
0, 0) &&
+   adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(4, 2, 
0)) {
+   ring->doorbell_index =
+   adev->doorbell_index.sdma_engine[i] << 
1;
+   ring->doorbell_index += 0x400;
+   } else {
+   /* From vega20, the sdma_doorbell_range in 1st
+* doorbell page is reserved for page queue.
+*/
+   ring->doorbell_index =
+   (adev->doorbell_index.sdma_engine[i] + 
1) << 1;
+   }
 
if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 
2, 2) && i >= 5)
ring->vm_hub = AMDGPU_MMHUB1(0);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index 7efe7c43fffb..441d6911fd20 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -1323,11 +1323,10 @@ static int sdma_v4_4_2_sw_init(void *handle)
ring->ring_obj = NULL;
ring->use_doorbell = true;
 
-   /* paging queue use same doorbell index/routing as gfx 
queue
-* with 0x400 (4096 dwords) offset on second doorbell 
page
+   /* doorbell index of page queue is assigned right after
+* gfx queue on the same instance
 */
-   ring->doorbell_index = 
adev->doorbell_index.sdma_engine[i] << 1;
-   ring->doorbell_index += 0x400;
+   ring->doorbell_index = 
(adev->doorbell_index.sdma_engine[i] + 1) << 1;
ring->vm_hub = AMDGPU_MMHUB0(0);
 
sprintf(ring->name, "page%d", i);
-- 
2.39.2



[PATCH 14/32] drm/amdkfd: Call DQM stop during DQM uninitialize

2023-03-28 Thread Alex Deucher
From: Mukul Joshi 

During DQM tear down, call DQM stop to unitialize HIQ and
associated memory allocated during packet manager init.

Signed-off-by: Mukul Joshi 
Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 07a04c41e92a..d18aa01a47e4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1348,9 +1348,16 @@ static int start_nocpsch(struct device_queue_manager 
*dqm)
 
 static int stop_nocpsch(struct device_queue_manager *dqm)
 {
+   dqm_lock(dqm);
+   if (!dqm->sched_running) {
+   dqm_unlock(dqm);
+   return 0;
+   }
+
if (dqm->dev->adev->asic_type == CHIP_HAWAII)
pm_uninit(&dqm->packet_mgr, false);
dqm->sched_running = false;
+   dqm_unlock(dqm);
 
return 0;
 }
@@ -2423,6 +2430,7 @@ static void deallocate_hiq_sdma_mqd(struct kfd_node *dev,
 
 void device_queue_manager_uninit(struct device_queue_manager *dqm)
 {
+   dqm->ops.stop(dqm);
dqm->ops.uninitialize(dqm);
if (!dqm->dev->kfd->shared_resources.enable_mes)
deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
-- 
2.39.2



[PATCH 27/32] drm/amdgpu: complement the IH node_id table for multiple AIDs

2023-03-28 Thread Alex Deucher
From: Le Ma 

With different node_id, the SDMA interrupt from multiple AIDs can be
distinguished by sw driver.

Signed-off-by: Le Ma 
Acked-by: Felix Kuehling 
Reviewed-by: Lijo Lazar 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c  | 4 
 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h  | 6 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 3 +++
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 31c6332190e5..d8471d119618 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -101,12 +101,16 @@ const char *soc15_ih_clientid_name[] = {
 };
 
 const int node_id_to_phys_map[NODEID_MAX] = {
+   [AID0_NODEID] = 0,
[XCD0_NODEID] = 0,
[XCD1_NODEID] = 1,
+   [AID1_NODEID] = 1,
[XCD2_NODEID] = 2,
[XCD3_NODEID] = 3,
+   [AID2_NODEID] = 2,
[XCD4_NODEID] = 4,
[XCD5_NODEID] = 5,
+   [AID3_NODEID] = 3,
[XCD6_NODEID] = 6,
[XCD7_NODEID] = 7,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
index a3543f121747..05a649285e9b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
@@ -101,13 +101,17 @@ struct amdgpu_irq {
uint32_tsrbm_soft_reset;
 };
 
-enum interrupt_node_id_per_xcp {
+enum interrupt_node_id_per_aid {
+   AID0_NODEID = 0,
XCD0_NODEID = 1,
XCD1_NODEID = 2,
+   AID1_NODEID = 4,
XCD2_NODEID = 5,
XCD3_NODEID = 6,
+   AID2_NODEID = 8,
XCD4_NODEID = 9,
XCD5_NODEID = 10,
+   AID3_NODEID = 12,
XCD6_NODEID = 13,
XCD7_NODEID = 14,
NODEID_MAX,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index 6935a24d1e89..d3c7f9a43ef1 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -1492,6 +1492,9 @@ static int sdma_v4_4_2_process_trap_irq(struct 
amdgpu_device *adev,
 
DRM_DEBUG("IH: SDMA trap\n");
instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
+   instance += node_id_to_phys_map[entry->node_id] *
+   adev->sdma.num_inst_per_aid;
+
switch (entry->ring_id) {
case 0:
amdgpu_fence_process(&adev->sdma.instance[instance].ring);
-- 
2.39.2



[PATCH 30/32] drm/amdgpu: program GRBM_MCM_ADDR for non-AID0 GRBM

2023-03-28 Thread Alex Deucher
From: Le Ma 

Otherwise the EOP interrupt on non-AID0 cannot route to IH0.

Signed-off-by: Le Ma 
Acked-by: Felix Kuehling 
Reviewed-by: Lijo Lazar 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 3811a7d82af9..bd375e472823 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -189,7 +189,10 @@ static void gfx_v9_4_3_set_kiq_pm4_funcs(struct 
amdgpu_device *adev)
 
 static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev)
 {
+   int i;
 
+   for (i = 2; i < adev->gfx.num_xcd; i++)
+   WREG32_SOC15(GC, i, regGRBM_MCM_ADDR, 0x4);
 }
 
 static void gfx_v9_4_3_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
-- 
2.39.2



[PATCH 26/32] drm/amdgpu: correct the vmhub reference for each XCD in gfxhub init

2023-03-28 Thread Alex Deucher
From: Le Ma 

Correct this though the value is same across different vmhub.

Signed-off-by: Le Ma 
Acked-by: Felix Kuehling 
Reviewed-by: Lijo Lazar 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c 
b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
index d74621662311..d3424ce97aa8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
@@ -42,10 +42,11 @@ static void gfxhub_v1_2_setup_vm_pt_regs(struct 
amdgpu_device *adev,
 uint32_t vmid,
 uint64_t page_table_base)
 {
-   struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+   struct amdgpu_vmhub *hub;
int i;
 
for (i = 0; i < adev->gfx.num_xcd; i++) {
+   hub = &adev->vmhub[AMDGPU_GFXHUB(i)];
WREG32_SOC15_OFFSET(GC, i,
regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid,
@@ -291,7 +292,7 @@ static void gfxhub_v1_2_disable_identity_aperture(struct 
amdgpu_device *adev)
 
 static void gfxhub_v1_2_setup_vmid_config(struct amdgpu_device *adev)
 {
-   struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+   struct amdgpu_vmhub *hub;
unsigned num_level, block_size;
uint32_t tmp;
int i, j;
@@ -304,6 +305,7 @@ static void gfxhub_v1_2_setup_vmid_config(struct 
amdgpu_device *adev)
block_size -= 9;
 
for (j = 0; j < adev->gfx.num_xcd; j++) {
+   hub = &adev->vmhub[AMDGPU_GFXHUB(j)];
for (i = 0; i <= 14; i++) {
tmp = RREG32_SOC15_OFFSET(GC, j, regVM_CONTEXT1_CNTL, 
i);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, 
ENABLE_CONTEXT, 1);
@@ -359,10 +361,11 @@ static void gfxhub_v1_2_setup_vmid_config(struct 
amdgpu_device *adev)
 
 static void gfxhub_v1_2_program_invalidation(struct amdgpu_device *adev)
 {
-   struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+   struct amdgpu_vmhub *hub;
unsigned i, j;
 
for (j = 0; j < adev->gfx.num_xcd; j++) {
+   hub = &adev->vmhub[AMDGPU_GFXHUB(j)];
for (i = 0 ; i < 18; ++i) {
WREG32_SOC15_OFFSET(GC, j, 
regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
i * hub->eng_addr_distance, 
0x);
@@ -408,11 +411,12 @@ static int gfxhub_v1_2_gart_enable(struct amdgpu_device 
*adev)
 
 static void gfxhub_v1_2_gart_disable(struct amdgpu_device *adev)
 {
-   struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+   struct amdgpu_vmhub *hub;
u32 tmp;
u32 i, j;
 
for (j = 0; j < adev->gfx.num_xcd; j++) {
+   hub = &adev->vmhub[AMDGPU_GFXHUB(j)];
/* Disable all tables */
for (i = 0; i < 16; i++)
WREG32_SOC15_OFFSET(GC, j, regVM_CONTEXT0_CNTL,
-- 
2.39.2



[PATCH 18/32] drm/amdkfd: Update coherence settings for svm ranges

2023-03-28 Thread Alex Deucher
From: Rajneesh Bhardwaj 

Recently introduced commit "drm/amdgpu: Set cache coherency
for GC 9.4.3" did not update the settings applicable for svm ranges.
Add the coherence settings for svm ranges for GFX IP 9.4.3.

Reviewed-by: Amber Lin 
Signed-off-by: Rajneesh Bhardwaj 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 17 +
 include/uapi/linux/kfd_ioctl.h   |  2 ++
 2 files changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 9f0932d0c93f..8023be9a9cc1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1156,6 +1156,7 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, 
struct svm_range *prange,
uint64_t pte_flags;
bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN);
bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT;
+   bool uncached = flags & KFD_IOCTL_SVM_FLAG_UNCACHED;
 
if (domain == SVM_RANGE_VRAM_DOMAIN)
bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
@@ -1195,6 +1196,22 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, 
struct svm_range *prange,
AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
}
break;
+   case IP_VERSION(9, 4, 3):
+   //TODO: Need more work for handling multiple memory partitions
+   //e.g. NPS4. Current approch is only applicable without memory
+   //partitions.
+   snoop = true;
+   if (uncached)
+   mapping_flags |= AMDGPU_VM_MTYPE_UC;
+   /* local HBM region close to partition*/
+   else if (bo_adev == adev)
+   mapping_flags |= AMDGPU_VM_MTYPE_RW;
+   /* local HBM region far from partition or remote XGMI GPU or
+* system memory
+*/
+   else
+   mapping_flags |= AMDGPU_VM_MTYPE_NC;
+   break;
default:
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 2da5c3ad71bd..2a9671e1ddb5 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -623,6 +623,8 @@ enum kfd_mmio_remap {
 #define KFD_IOCTL_SVM_FLAG_GPU_READ_MOSTLY 0x0020
 /* Keep GPU memory mapping always valid as if XNACK is disable */
 #define KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED   0x0040
+/* Uncached access to memory */
+#define KFD_IOCTL_SVM_FLAG_UNCACHED 0x0080
 
 /**
  * kfd_ioctl_svm_op - SVM ioctl operations
-- 
2.39.2



[PATCH 24/32] drm/amdgpu: assign the doorbell index for sdma on non-AID0

2023-03-28 Thread Alex Deucher
From: Le Ma 

Allocate new sdma doorbell index for the instances only on AID1 for now.

Todo: there's limitation that SDMA doorbell index on SDMA 4.4.2 needs to be
less than 0x1FF, so the tail part in _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT is not
enough to store sdma doorbell range on maximum 4 AIDs if doorbell_range is 20.
So it looks better to create a new doorbell index assignment table for 4.4.2.

v2: change "(x << 1) + 2" to "(x + 1) << 1" for readability.

Signed-off-by: Le Ma 
Acked-by: Felix Kuehling 
Reviewed-by: Lijo Lazar 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h |  6 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 18 --
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
index 711bdeaa7417..613bc035f2e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
@@ -80,6 +80,7 @@ struct amdgpu_doorbell_index {
uint32_t last_non_cp;
uint32_t xcc1_kiq_start;
uint32_t xcc1_mec_ring0_start;
+   uint32_t aid1_sdma_start;
uint32_t max_assignment;
/* Per engine SDMA doorbell size in dword */
uint32_t sdma_doorbell_range;
@@ -166,7 +167,10 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT
/* 8 compute rings per GC. Max to 0x1CE */
AMDGPU_VEGA20_DOORBELL_XCC1_MEC_RING0_START   = 0x197,
 
-   AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT= 0x1CE,
+   /* AID1 SDMA: 0x1D0 ~ 0x1F7 */
+   AMDGPU_VEGA20_DOORBELL_AID1_sDMA_START   = 0x1D0,
+
+   AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT= 0x1F7,
AMDGPU_VEGA20_DOORBELL_INVALID   = 0x
 } AMDGPU_VEGA20_DOORBELL_ASSIGNMENT;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index 7deadea03caa..6935a24d1e89 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -1310,7 +1310,14 @@ static int sdma_v4_4_2_sw_init(void *handle)
ring->use_doorbell?"true":"false");
 
/* doorbell size is 2 dwords, get DWORD offset */
-   ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1;
+   if (aid_id > 0)
+   ring->doorbell_index =
+   (adev->doorbell_index.aid1_sdma_start << 1)
+   + adev->doorbell_index.sdma_doorbell_range
+   * (i - adev->sdma.num_inst_per_aid);
+   else
+   ring->doorbell_index =
+   adev->doorbell_index.sdma_engine[i] << 1;
ring->vm_hub = AMDGPU_MMHUB0(aid_id);
 
sprintf(ring->name, "sdma%d.%d", aid_id,
@@ -1329,7 +1336,14 @@ static int sdma_v4_4_2_sw_init(void *handle)
/* doorbell index of page queue is assigned right after
 * gfx queue on the same instance
 */
-   ring->doorbell_index = 
(adev->doorbell_index.sdma_engine[i] + 1) << 1;
+   if (aid_id > 0)
+   ring->doorbell_index =
+   ((adev->doorbell_index.aid1_sdma_start 
+ 1) << 1)
+   + 
adev->doorbell_index.sdma_doorbell_range
+   * (i - adev->sdma.num_inst_per_aid);
+   else
+   ring->doorbell_index =
+   (adev->doorbell_index.sdma_engine[i] + 
1) << 1;
ring->vm_hub = AMDGPU_MMHUB0(aid_id);
 
sprintf(ring->name, "page%d.%d", aid_id,
-- 
2.39.2



[PATCH 19/32] drm/amdgpu: Use new atomfirmware init for GC 9.4.3

2023-03-28 Thread Alex Deucher
From: Lijo Lazar 

Use the new atomfirmware initialization logic for GC 9.4.3 based ASICs
also. ASIC init logic doesn't consider boot clocks during init.

Signed-off-by: Lijo Lazar 
Reviewed-by: Hawking Zhang 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index aa3c27b6035a..1c657273d9ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -941,7 +941,8 @@ static int amdgpu_device_asic_init(struct amdgpu_device 
*adev)
 {
amdgpu_asic_pre_asic_init(adev);
 
-   if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
+   if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) ||
+   adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
return amdgpu_atomfirmware_asic_init(adev, true);
else
return amdgpu_atom_asic_init(adev->mode_info.atom_context);
-- 
2.39.2



[PATCH 10/32] drm/amdkfd: Add XCC instance to kgd2kfd interface (v3)

2023-03-28 Thread Alex Deucher
From: Mukul Joshi 

Gfx 9 starts to have multiple XCC instances in one device. Add instance
parameter to kgd2kfd functions where XCC instance was hard coded as 0.
Also, update code to pass the correct instance number when running
on a multi-XCC setup.

v2: introduce the XCC instance to gfx v11 (Morris)
v3: rebase (Alex)

Signed-off-by: Amber Lin 
Signed-off-by: Mukul Joshi 
Reviewed-by: Felix Kuehling 
Tested-by: Amber Lin 
Signed-off-by: Morris Zhang 
Signed-off-by: Alex Deucher 
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   |  38 ++---
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|  22 +--
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c  |  27 +--
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c|  19 ++-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c |  17 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c |  17 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 160 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |  29 ++--
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 103 +++
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c  |   6 +-
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c  |   2 +-
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  |   2 +-
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  |   2 +-
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   |  15 +-
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c   |   2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  |   2 +-
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |  25 +--
 17 files changed, 270 insertions(+), 218 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
index 562e1a04160f..49d8087e469e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
@@ -33,7 +33,7 @@
 #include "soc15.h"
 
 static int kgd_gfx_v9_4_3_set_pasid_vmid_mapping(struct amdgpu_device *adev,
-   u32 pasid, unsigned int vmid)
+   u32 pasid, unsigned int vmid, uint32_t inst)
 {
unsigned long timeout;
 
@@ -47,11 +47,11 @@ static int kgd_gfx_v9_4_3_set_pasid_vmid_mapping(struct 
amdgpu_device *adev,
uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
ATC_VMID0_PASID_MAPPING__VALID_MASK;
 
-   WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+   WREG32(SOC15_REG_OFFSET(ATHUB, inst,
regATC_VMID0_PASID_MAPPING) + vmid, pasid_mapping);
 
timeout = jiffies + msecs_to_jiffies(10);
-   while (!(RREG32(SOC15_REG_OFFSET(ATHUB, 0,
+   while (!(RREG32(SOC15_REG_OFFSET(ATHUB, inst,
regATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
(1U << vmid))) {
if (time_after(jiffies, timeout)) {
@@ -61,13 +61,13 @@ static int kgd_gfx_v9_4_3_set_pasid_vmid_mapping(struct 
amdgpu_device *adev,
cpu_relax();
}
 
-   WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+   WREG32(SOC15_REG_OFFSET(ATHUB, inst,
regATC_VMID_PASID_MAPPING_UPDATE_STATUS),
1U << vmid);
 
-   WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid,
+   WREG32(SOC15_REG_OFFSET(OSSSYS, inst, mmIH_VMID_0_LUT) + vmid,
pasid_mapping);
-   WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid,
+   WREG32(SOC15_REG_OFFSET(OSSSYS, inst, mmIH_VMID_0_LUT_MM) + vmid,
pasid_mapping);
 
return 0;
@@ -81,7 +81,7 @@ static inline struct v9_mqd *get_mqd(void *mqd)
 static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t __user *wptr, uint32_t wptr_shift,
-   uint32_t wptr_mask, struct mm_struct *mm)
+   uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
 {
struct v9_mqd *m;
uint32_t *mqd_hqd;
@@ -89,12 +89,12 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device 
*adev, void *mqd,
 
m = get_mqd(mqd);
 
-   kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id);
+   kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
 
/* HQD registers extend to CP_HQD_AQL_DISPATCH_ID_HI */
mqd_hqd = &m->cp_mqd_base_addr_lo;
-   hqd_base = SOC15_REG_OFFSET(GC, 0, regCP_MQD_BASE_ADDR);
-   hqd_end = SOC15_REG_OFFSET(GC, 0, regCP_HQD_AQL_DISPATCH_ID_HI);
+   hqd_base = SOC15_REG_OFFSET(GC, inst, regCP_MQD_BASE_ADDR);
+   hqd_end = SOC15_REG_OFFSET(GC, inst, regCP_HQD_AQL_DISPATCH_ID_HI);
 
for (reg = hqd_base; reg <= hqd_end; reg++)
WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
@@ -103,7 +103,7 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device 
*adev, void *mqd,
/* Activate doorbell logic before triggering WPTR poll. */
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
 CP_HQD_PQ_DOORBELL_CONTROL, DOORBEL

[PATCH 16/32] drm/amdkfd: Update SDMA queue management for GFX9.4.3

2023-03-28 Thread Alex Deucher
From: Mukul Joshi 

This patch updates SDMA queue management for multi XCC in GFX9.4.3.
- Allocate/deallocate SDMA queues from the correct SDMA engines
  based on the partition mode.
- Updates the kgd2kfd interface to fetch the correct SDMA register
  addresses.
- It also fixes dumping correct SDMA queue info in debugfs.

Signed-off-by: Mukul Joshi 
Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   | 194 +-
 drivers/gpu/drm/amd/amdkfd/kfd_device.c   |   8 +-
 .../drm/amd/amdkfd/kfd_device_queue_manager.c |  59 +++---
 .../drm/amd/amdkfd/kfd_device_queue_manager.h |   4 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |   3 +
 5 files changed, 227 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
index 49d8087e469e..e81bdca53f42 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
@@ -31,6 +31,192 @@
 #include "oss/osssys_4_0_sh_mask.h"
 #include "v9_structs.h"
 #include "soc15.h"
+#include "sdma/sdma_4_4_2_offset.h"
+#include "sdma/sdma_4_4_2_sh_mask.h"
+
+static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+   return (struct v9_sdma_mqd *)mqd;
+}
+
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
+   unsigned int engine_id,
+   unsigned int queue_id)
+{
+   uint32_t sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, engine_id,
+   regSDMA_RLC0_RB_CNTL) -
+   regSDMA_RLC0_RB_CNTL;
+   uint32_t retval = sdma_engine_reg_base +
+ queue_id * (regSDMA_RLC1_RB_CNTL - regSDMA_RLC0_RB_CNTL);
+
+   pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+   queue_id, retval);
+   return retval;
+}
+
+int kgd_gfx_v9_4_3_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
+uint32_t __user *wptr, struct mm_struct *mm)
+{
+   struct v9_sdma_mqd *m;
+   uint32_t sdma_rlc_reg_offset;
+   unsigned long end_jiffies;
+   uint32_t data;
+   uint64_t data64;
+   uint64_t __user *wptr64 = (uint64_t __user *)wptr;
+
+   m = get_sdma_mqd(mqd);
+   sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+   m->sdma_queue_id);
+
+   WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL,
+   m->sdmax_rlcx_rb_cntl & (~SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+   end_jiffies = msecs_to_jiffies(2000) + jiffies;
+   while (true) {
+   data = RREG32(sdma_rlc_reg_offset + 
regSDMA_RLC0_CONTEXT_STATUS);
+   if (data & SDMA_RLC0_CONTEXT_STATUS__IDLE_MASK)
+   break;
+   if (time_after(jiffies, end_jiffies)) {
+   pr_err("SDMA RLC not idle in %s\n", __func__);
+   return -ETIME;
+   }
+   usleep_range(500, 1000);
+   }
+
+   WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_DOORBELL_OFFSET,
+   m->sdmax_rlcx_doorbell_offset);
+
+   data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA_RLC0_DOORBELL,
+   ENABLE, 1);
+   WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_DOORBELL, data);
+   WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR,
+   m->sdmax_rlcx_rb_rptr);
+   WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_HI,
+   m->sdmax_rlcx_rb_rptr_hi);
+
+   WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_MINOR_PTR_UPDATE, 1);
+   if (read_user_wptr(mm, wptr64, data64)) {
+   WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR,
+   lower_32_bits(data64));
+   WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR_HI,
+   upper_32_bits(data64));
+   } else {
+   WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR,
+   m->sdmax_rlcx_rb_rptr);
+   WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR_HI,
+   m->sdmax_rlcx_rb_rptr_hi);
+   }
+   WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_MINOR_PTR_UPDATE, 0);
+
+   WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_BASE, 
m->sdmax_rlcx_rb_base);
+   WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_BASE_HI,
+   m->sdmax_rlcx_rb_base_hi);
+   WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_ADDR_LO,
+   m->sdmax_rlcx_rb_rptr_addr_lo);
+   WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_ADDR_HI,
+   m->sdmax_rlcx_rb_rptr_addr_hi);
+
+   data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA_RLC0_RB_CNTL,
+  

[PATCH 15/32] drm/amdkfd: Update sysfs node properties for multi XCC

2023-03-28 Thread Alex Deucher
From: Mukul Joshi 

Update simd_count and array_count node properties to report
values multiplied by number of XCCs in the partition.

Signed-off-by: Mukul Joshi 
Tested-by: Amber Lin 
Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 94af37df3ed2..6d958bf0fe90 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -468,7 +468,8 @@ static ssize_t node_show(struct kobject *kobj, struct 
attribute *attr,
sysfs_show_32bit_prop(buffer, offs, "cpu_cores_count",
  dev->node_props.cpu_cores_count);
sysfs_show_32bit_prop(buffer, offs, "simd_count",
- dev->gpu ? dev->node_props.simd_count : 0);
+ dev->gpu ? (dev->node_props.simd_count *
+ dev->gpu->num_xcc_per_node) : 0);
sysfs_show_32bit_prop(buffer, offs, "mem_banks_count",
  dev->node_props.mem_banks_count);
sysfs_show_32bit_prop(buffer, offs, "caches_count",
@@ -492,7 +493,8 @@ static ssize_t node_show(struct kobject *kobj, struct 
attribute *attr,
sysfs_show_32bit_prop(buffer, offs, "wave_front_size",
  dev->node_props.wave_front_size);
sysfs_show_32bit_prop(buffer, offs, "array_count",
- dev->node_props.array_count);
+ dev->gpu ? (dev->node_props.array_count *
+ dev->gpu->num_xcc_per_node) : 0);
sysfs_show_32bit_prop(buffer, offs, "simd_arrays_per_engine",
  dev->node_props.simd_arrays_per_engine);
sysfs_show_32bit_prop(buffer, offs, "cu_per_simd_array",
-- 
2.39.2



[PATCH 13/32] drm/amdgpu: Fix VM fault reporting on XCC1

2023-03-28 Thread Alex Deucher
From: Mukul Joshi 

Fix VM fault reporting and clear VM fault register
for XCC1.

Signed-off-by: Mukul Joshi 
Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 18aa2a896c3d..cb8ea1f5bc44 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -555,6 +555,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device 
*adev,
const char *mmhub_cid;
const char *hub_name;
u64 addr;
+   uint32_t node_id;
 
addr = (u64)entry->src_data[0] << 12;
addr |= ((u64)entry->src_data[1] & 0xf) << 44;
@@ -594,7 +595,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device 
*adev,
hub = &adev->vmhub[AMDGPU_MMHUB1(0)];
} else {
hub_name = "gfxhub0";
-   hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+   node_id = (adev->ip_versions[GC_HWIP][0] ==
+  IP_VERSION(9, 4, 3)) ? entry->node_id : 0;
+   hub = &adev->vmhub[node_id/2];
}
 
memset(&task_info, 0, sizeof(struct amdgpu_task_info));
@@ -628,11 +631,10 @@ static int gmc_v9_0_process_interrupt(struct 
amdgpu_device *adev,
rw = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, RW);
WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
 
-
dev_err(adev->dev,
"VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
status);
-   if (hub == &adev->vmhub[AMDGPU_GFXHUB(0)]) {
+   if (entry->vmid_src == AMDGPU_GFXHUB(0)) {
dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" :
gfxhub_client_ids[cid],
-- 
2.39.2



[PATCH 08/32] drm/amdkfd: Update MQD management on multi XCC setup

2023-03-28 Thread Alex Deucher
From: Mukul Joshi 

Update MQD management for both HIQ and user-mode compute
queues on a multi XCC setup. MQDs needs to be allocated,
initialized, loaded and destroyed for each XCC in the KFD
node.

Signed-off-by: Mukul Joshi 
Signed-off-by: Amber Lin 
Tested-by: Amber Lin 
Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |  51 ++-
 .../drm/amd/amdkfd/kfd_device_queue_manager.c |   3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c  |  28 +-
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h  |   8 +
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c  |   3 +
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  |   3 +
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   | 292 --
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c   |   3 +
 .../amd/amdkfd/kfd_process_queue_manager.c|  16 +-
 drivers/gpu/drm/amd/include/v9_structs.h  |  30 +-
 10 files changed, 380 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index ce34b73d05bc..a26e9009d4e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -804,6 +804,41 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device 
*bdev,
sg_free_table(ttm->sg);
 }
 
+/*
+ * total_pages is constructed as MQD0+CtrlStack0 + MQD1+CtrlStack1 + ...
+ * MQDn+CtrlStackn where n is the number of XCCs per partition.
+ * pages_per_xcc is the size of one MQD+CtrlStack. The first page is MQD
+ * and uses memory type default, UC. The rest of pages_per_xcc are
+ * Ctrl stack and modify their memory type to NC.
+ */
+static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev,
+   struct ttm_tt *ttm, uint64_t flags)
+{
+   struct amdgpu_ttm_tt *gtt = (void *)ttm;
+   uint64_t total_pages = ttm->num_pages;
+   int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
+   uint64_t page_idx, pages_per_xcc = total_pages / num_xcc;
+   int i;
+   uint64_t ctrl_flags = (flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
+   AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
+
+   for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) {
+   /* MQD page: use default flags */
+   amdgpu_gart_bind(adev,
+   gtt->offset + (page_idx << PAGE_SHIFT),
+   1, >t->ttm.dma_address[page_idx], flags);
+   /*
+* Ctrl pages - modify the memory type to NC (ctrl_flags) from
+* the second page of the BO onward.
+*/
+   amdgpu_gart_bind(adev,
+   gtt->offset + ((page_idx + 1) << PAGE_SHIFT),
+   pages_per_xcc - 1,
+   >t->ttm.dma_address[page_idx + 1],
+   ctrl_flags);
+   }
+}
+
 static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
 struct ttm_buffer_object *tbo,
 uint64_t flags)
@@ -816,21 +851,7 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device 
*adev,
flags |= AMDGPU_PTE_TMZ;
 
if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) {
-   uint64_t page_idx = 1;
-
-   amdgpu_gart_bind(adev, gtt->offset, page_idx,
-gtt->ttm.dma_address, flags);
-
-   /* The memory type of the first page defaults to UC. Now
-* modify the memory type to NC from the second page of
-* the BO onward.
-*/
-   flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
-   flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
-
-   amdgpu_gart_bind(adev, gtt->offset + (page_idx << PAGE_SHIFT),
-ttm->num_pages - page_idx,
-&(gtt->ttm.dma_address[page_idx]), flags);
+   amdgpu_ttm_gart_bind_gfx9_mqd(adev, ttm, flags);
} else {
amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
 gtt->ttm.dma_address, flags);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 6ee17100c333..0f0a7d73fad3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -2247,7 +2247,8 @@ static int allocate_hiq_sdma_mqd(struct 
device_queue_manager *dqm)
uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
get_num_all_sdma_engines(dqm) *
dev->kfd->device_info.num_sdma_queues_per_engine +
-   dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
+   (dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size +
+   dqm->dev->num_xcc_per_node);
 
retval = amdgpu_amdkfd_alloc_gtt_mem(d

[PATCH 11/32] drm/amdgpu: Add XCC inst to PASID TLB flushing

2023-03-28 Thread Alex Deucher
From: Mukul Joshi 

Add XCC instance to select the correct KIQ ring when
flushing TLBs on a multi-XCC setup.

Signed-off-by: Mukul Joshi 
Tested-by: Amber Lin 
Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  6 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h|  7 ---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 12 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_process.c   |  7 +--
 9 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 90d872356edc..0f641bb30870 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -742,7 +742,9 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device 
*adev,
 }
 
 int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
- uint16_t pasid, enum TLB_FLUSH_TYPE 
flush_type)
+ uint16_t pasid,
+ enum TLB_FLUSH_TYPE flush_type,
+ uint32_t inst)
 {
bool all_hub = false;
 
@@ -750,7 +752,7 @@ int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device 
*adev,
adev->family == AMDGPU_FAMILY_RV)
all_hub = true;
 
-   return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
+   return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub, 
inst);
 }
 
 bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 01ba3589b60a..df07e212c21e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -160,7 +160,8 @@ bool amdgpu_amdkfd_have_atomics_support(struct 
amdgpu_device *adev);
 int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
uint16_t vmid);
 int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
-   uint16_t pasid, enum TLB_FLUSH_TYPE flush_type);
+   uint16_t pasid, enum TLB_FLUSH_TYPE flush_type,
+   uint32_t inst);
 
 bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 232523e3e270..5c1d68bec0be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -118,7 +118,8 @@ struct amdgpu_gmc_funcs {
uint32_t vmhub, uint32_t flush_type);
/* flush the vm tlb via pasid */
int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
-   uint32_t flush_type, bool all_hub);
+   uint32_t flush_type, bool all_hub,
+   uint32_t inst);
/* flush the vm tlb via ring */
uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
   uint64_t pd_addr);
@@ -295,9 +296,9 @@ struct amdgpu_gmc {
 };
 
 #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) 
((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
-#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \
+#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub, inst) \
((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
-   ((adev), (pasid), (type), (allhub)))
+   ((adev), (pasid), (type), (allhub), (inst)))
 #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) 
(r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
 #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) 
(r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
 #define amdgpu_gmc_map_mtype(adev, flags) 
(adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 503b4ff13fa4..100da79e17d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -419,7 +419,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
  */
 static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
uint16_t pasid, uint32_t flush_type,
-   bool all_hub)
+   bool all_hub, uint32_t inst)
 {
int vmid, i;
signed long r;
diff --git a/drivers/gp

[PATCH 17/32] drm/amdgpu: Fix CP_HYP_XCP_CTL register programming in CPX mode

2023-03-28 Thread Alex Deucher
From: Mukul Joshi 

Currently, in CPX mode, the CP_HYP_XCP_CTL register is programmed
incorrectly with the number of XCCs in the partition. As a result,
HIQ doesn't work in CPX mode. Fix this by programming the correct
number of XCCs in a partition, which is 1, in CPX mode.

Signed-off-by: Mukul Joshi 
Reviewed-by: Le Ma 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 7b589f279ece..3811a7d82af9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -1166,7 +1166,7 @@ static void gfx_v9_4_3_program_xcc_id(struct 
amdgpu_device *adev, int xcc_id)
break;
case 2:
tmp = (xcc_id % adev->gfx.num_xcc_per_xcp) << 
REG_FIELD_SHIFT(CP_HYP_XCP_CTL, VIRTUAL_XCC_ID);
-   tmp = tmp | (adev->gfx.num_xcd << 
REG_FIELD_SHIFT(CP_HYP_XCP_CTL, NUM_XCC_IN_XCP));
+   tmp = tmp | (adev->gfx.num_xcc_per_xcp << 
REG_FIELD_SHIFT(CP_HYP_XCP_CTL, NUM_XCC_IN_XCP));
WREG32_SOC15(GC, xcc_id, regCP_HYP_XCP_CTL, tmp);
 
tmp = xcc_id << REG_FIELD_SHIFT(CP_PSP_XCP_CTL, 
PHYSICAL_XCC_ID);
-- 
2.39.2



[PATCH 09/32] drm/amdkfd: Add PM4 target XCC

2023-03-28 Thread Alex Deucher
From: Mukul Joshi 

In a device that supports multiple XCCs, unlike AQL queues, the PM4 queue
will be only processed in one XCC in the partitioning. This patch
re-purposes the queue percentage variable in create queue and update
queue ioctl for the user space to specify the target XCC.

Signed-off-by: Amber Lin 
Signed-off-by: Mukul Joshi 
Tested-by: Amber Lin 
Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 22 +++
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   |  2 ++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  1 +
 .../amd/amdkfd/kfd_process_queue_manager.c|  1 +
 4 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 8949dcd24f79..cf1a97583901 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -186,7 +186,12 @@ static int kfd_ioctl_get_version(struct file *filep, 
struct kfd_process *p,
 static int set_queue_properties_from_user(struct queue_properties 
*q_properties,
struct kfd_ioctl_create_queue_args *args)
 {
-   if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
+   /*
+* Repurpose queue percentage to accommodate new features:
+* bit 0-7: queue percentage
+* bit 8-15: pm4_target_xcc
+*/
+   if ((args->queue_percentage & 0xFF) > KFD_MAX_QUEUE_PERCENTAGE) {
pr_err("Queue percentage must be between 0 to 
KFD_MAX_QUEUE_PERCENTAGE\n");
return -EINVAL;
}
@@ -236,7 +241,9 @@ static int set_queue_properties_from_user(struct 
queue_properties *q_properties,
 
q_properties->is_interop = false;
q_properties->is_gws = false;
-   q_properties->queue_percent = args->queue_percentage;
+   q_properties->queue_percent = args->queue_percentage & 0xFF;
+   /* bit 8-15 are repurposed to be PM4 target XCC */
+   q_properties->pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF;
q_properties->priority = args->queue_priority;
q_properties->queue_address = args->ring_base_address;
q_properties->queue_size = args->ring_size;
@@ -442,7 +449,12 @@ static int kfd_ioctl_update_queue(struct file *filp, 
struct kfd_process *p,
struct kfd_ioctl_update_queue_args *args = data;
struct queue_properties properties;
 
-   if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
+   /*
+* Repurpose queue percentage to accommodate new features:
+* bit 0-7: queue percentage
+* bit 8-15: pm4_target_xcc
+*/
+   if ((args->queue_percentage & 0xFF) > KFD_MAX_QUEUE_PERCENTAGE) {
pr_err("Queue percentage must be between 0 to 
KFD_MAX_QUEUE_PERCENTAGE\n");
return -EINVAL;
}
@@ -466,7 +478,9 @@ static int kfd_ioctl_update_queue(struct file *filp, struct 
kfd_process *p,
 
properties.queue_address = args->ring_base_address;
properties.queue_size = args->ring_size;
-   properties.queue_percent = args->queue_percentage;
+   properties.queue_percent = args->queue_percentage & 0xFF;
+   /* bit 8-15 are repurposed to be PM4 target XCC */
+   properties.pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF;
properties.priority = args->queue_priority;
 
pr_debug("Updating queue id %d for pasid 0x%x\n",
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index fa89606099b8..6bfa9b9d75bd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -586,6 +586,7 @@ static void init_mqd_v9_4_3(struct mqd_manager *mm, void 
**mqd,
/* PM4 Queue */
m->compute_current_logic_xcc_id = 0;
m->compute_tg_chunk_size = 0;
+   m->pm4_target_xcc_in_xcp = q->pm4_target_xcc;
}
 
if (xcc == 0) {
@@ -626,6 +627,7 @@ static void update_mqd_v9_4_3(struct mqd_manager *mm, void 
*mqd,
/* PM4 Queue */
m->compute_current_logic_xcc_id = 0;
m->compute_tg_chunk_size = 0;
+   m->pm4_target_xcc_in_xcp = q->pm4_target_xcc;
}
}
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 873b49238dc1..1337fcdf8958 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -509,6 +509,7 @@ struct queue_properties {
bool is_evicted;
bool is_active;
bool is_gws;
+   uint32_t pm4_target_xcc;
/* Not relevant for user mode queues in cp scheduling */
unsigned int vmid;
/* Relevant only for sdma queues*/
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_qu

[PATCH 06/32] drm/amdkfd: Introduce kfd_node struct (v5)

2023-03-28 Thread Alex Deucher
From: Mukul Joshi 

Introduce a new structure, kfd_node, which will now represent
a compute node. kfd_node is carved out of kfd_dev structure.
kfd_dev struct now will become the parent of kfd_node, and will
store common resources such as doorbells, GTT sub-alloctor etc.
kfd_node struct will store all resources specific to a compute
node, such as device queue manager, interrupt handling etc.

This is the first step in adding compute partition support in KFD.

v2: introduce kfd_node struct to gc v11 (Hawking)
v3: make reference to kfd_dev struct through kfd_node (Morris)
v4: use kfd_node instead for kfd isr/mqd functions (Morris)
v5: rebase (Alex)

Signed-off-by: Mukul Joshi 
Tested-by: Amber Lin 
Reviewed-by: Felix Kuehling 
Signed-off-by: Hawking Zhang 
Signed-off-by: Morris Zhang 
Signed-off-by: Alex Deucher 
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   1 +
 .../gpu/drm/amd/amdkfd/cik_event_interrupt.c  |   4 +-
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  |  43 +--
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c |  28 +-
 drivers/gpu/drm/amd/amdkfd/kfd_crat.h |   6 +-
 drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c  |   2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_device.c   | 258 +++---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 100 +++
 .../drm/amd/amdkfd/kfd_device_queue_manager.h |   4 +-
 .../amd/amdkfd/kfd_device_queue_manager_v9.c  |   4 +-
 drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c |  14 +-
 drivers/gpu/drm/amd/amdkfd/kfd_events.c   |  12 +-
 drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c  |  12 +-
 .../gpu/drm/amd/amdkfd/kfd_int_process_v11.c  |   6 +-
 .../gpu/drm/amd/amdkfd/kfd_int_process_v9.c   |  10 +-
 drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c|  64 ++---
 drivers/gpu/drm/amd/amdkfd/kfd_iommu.c|  22 +-
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c |  18 +-
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h |   2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c  |   8 +-
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c  |   6 +-
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h  |   8 +-
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c  |   6 +-
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  |   8 +-
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  |  18 +-
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   |  26 +-
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c   |  10 +-
 .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   |   2 +-
 .../drm/amd/amdkfd/kfd_packet_manager_v9.c|   8 +-
 .../drm/amd/amdkfd/kfd_packet_manager_vi.c|   2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 168 +++-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  |  54 ++--
 .../amd/amdkfd/kfd_process_queue_manager.c|  20 +-
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c   |  40 +--
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h   |   8 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c  |   4 +-
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c |  56 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_topology.h |   8 +-
 38 files changed, 574 insertions(+), 496 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index e7403f8e4eba..f859e1d9af3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -34,6 +34,7 @@
 #include "amdgpu_dma_buf.h"
 #include 
 #include "amdgpu_xgmi.h"
+#include "kfd_priv.h"
 #include "kfd_smi_events.h"
 
 /* Userptr restore delay, just long enough to allow consecutive VM
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c 
b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
index 5c8023cba196..4ebfff6b6c55 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -26,7 +26,7 @@
 #include "amdgpu_amdkfd.h"
 #include "kfd_smi_events.h"
 
-static bool cik_event_interrupt_isr(struct kfd_dev *dev,
+static bool cik_event_interrupt_isr(struct kfd_node *dev,
const uint32_t *ih_ring_entry,
uint32_t *patched_ihre,
bool *patched_flag)
@@ -85,7 +85,7 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev,
!amdgpu_no_queue_eviction_on_vm_fault);
 }
 
-static void cik_event_interrupt_wq(struct kfd_dev *dev,
+static void cik_event_interrupt_wq(struct kfd_node *dev,
const uint32_t *ih_ring_entry)
 {
const struct cik_ih_ring_entry *ihre =
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 7228a3db63a2..8949dcd24f79 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -293,7 +293,7 @@ static int kfd_ioctl_create_queue(struct file *filep, 
struct kfd_process *p,
void *data)
 {
struct kfd_ioctl_create_queue_args *args = data;
-   struc

[PATCH 07/32] drm/amdkfd: Add spatial partitioning support in KFD

2023-03-28 Thread Alex Deucher
From: Mukul Joshi 

This patch introduces multi-partition support in KFD.
This patch includes:
- Support for maximum 8 spatial partitions in KFD.
- Initialize one HIQ per partition.
- Management of VMID range depending on partition mode.
- Management of doorbell aperture space between all
  partitions.
- Each partition does its own queue management, interrupt
  handling, SMI event reporting.
- IOMMU, if enabled with multiple partitions, will only work
  on first partition.
- SPM is only supported on the first partition.
- Currently, there is no support for resetting individual
  partitions. All partitions will reset together.

Signed-off-by: Mukul Joshi 
Tested-by: Amber Lin 
Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c   | 214 +-
 .../drm/amd/amdkfd/kfd_device_queue_manager.c |   2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_iommu.c|  13 +-
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c  |   8 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  16 +-
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c   |  24 +-
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c |   8 +-
 7 files changed, 208 insertions(+), 77 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index b0231ee08ea3..363114edb3c0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -559,23 +559,27 @@ static int kfd_init_node(struct kfd_node *node)
return err;
 }
 
-static void kfd_cleanup_node(struct kfd_dev *kfd)
+static void kfd_cleanup_nodes(struct kfd_dev *kfd, unsigned int num_nodes)
 {
-   struct kfd_node *knode = kfd->node;
-
-   device_queue_manager_uninit(knode->dqm);
-   kfd_interrupt_exit(knode);
-   kfd_topology_remove_device(knode);
-   if (knode->gws)
-   amdgpu_amdkfd_free_gws(knode->adev, knode->gws);
-   kfree(knode);
-   kfd->node = NULL;
+   struct kfd_node *knode;
+   unsigned int i;
+
+   for (i = 0; i < num_nodes; i++) {
+   knode = kfd->nodes[i];
+   device_queue_manager_uninit(knode->dqm);
+   kfd_interrupt_exit(knode);
+   kfd_topology_remove_device(knode);
+   if (knode->gws)
+   amdgpu_amdkfd_free_gws(knode->adev, knode->gws);
+   kfree(knode);
+   kfd->nodes[i] = NULL;
+   }
 }
 
 bool kgd2kfd_device_init(struct kfd_dev *kfd,
 const struct kgd2kfd_shared_resources *gpu_resources)
 {
-   unsigned int size, map_process_packet_size;
+   unsigned int size, map_process_packet_size, i;
struct kfd_node *node;
uint32_t first_vmid_kfd, last_vmid_kfd, vmid_num_kfd;
unsigned int max_proc_per_quantum;
@@ -588,9 +592,18 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
KGD_ENGINE_SDMA1);
kfd->shared_resources = *gpu_resources;
 
-   first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1;
-   last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1;
-   vmid_num_kfd = last_vmid_kfd - first_vmid_kfd + 1;
+   if (kfd->adev->gfx.num_xcd == 0 || kfd->adev->gfx.num_xcd == 1 ||
+   kfd->adev->gfx.num_xcc_per_xcp == 0)
+   kfd->num_nodes = 1;
+   else
+   kfd->num_nodes =
+   kfd->adev->gfx.num_xcd/kfd->adev->gfx.num_xcc_per_xcp;
+   if (kfd->num_nodes == 0) {
+   dev_err(kfd_device,
+   "KFD num nodes cannot be 0, GC inst: %d, 
num_xcc_in_node: %d\n",
+   kfd->adev->gfx.num_xcd, kfd->adev->gfx.num_xcc_per_xcp);
+   goto out;
+   }
 
/* Allow BIF to recode atomics to PCIe 3.0 AtomicOps.
 * 32 and 64-bit requests are possible and must be
@@ -609,6 +622,26 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
return false;
}
 
+   first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1;
+   last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1;
+   vmid_num_kfd = last_vmid_kfd - first_vmid_kfd + 1;
+
+   /* For GFX9.4.3, we need special handling for VMIDs depending on
+* partition mode.
+* In CPX mode, the VMID range needs to be shared between XCDs.
+* Additionally, there are 13 VMIDs (3-15) available for KFD. To
+* divide them equally, we change starting VMID to 4 and not use
+* VMID 3.
+* If the VMID range changes for GFX9.4.3, then this code MUST be
+* revisited.
+*/
+   if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) &&
+   kfd->adev->gfx.partition_mode == AMDGPU_CPX_PARTITION_MODE &&
+   kfd->num_nodes != 1) {
+   vmid_num_kfd /= 2;
+   first_vmid_kfd = last_vmid_kfd + 1 - vmid_num_kfd*2;
+   }
+
/* Verify module parameters regarding mapped process number*/
if (hws_max_conc_proc >= 0)

[PATCH 03/32] drm/amdgpu: add node_id to physical id conversion in EOP handler

2023-03-28 Thread Alex Deucher
From: Le Ma 

A new field nodeid in interrupt cookie indicates the node ID.

Signed-off-by: Le Ma 
Reviewed-by: Shiwu Zhang 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 11 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h | 14 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c |  6 --
 3 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index a6aef488a822..31c6332190e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -100,6 +100,17 @@ const char *soc15_ih_clientid_name[] = {
"MP1"
 };
 
+const int node_id_to_phys_map[NODEID_MAX] = {
+   [XCD0_NODEID] = 0,
+   [XCD1_NODEID] = 1,
+   [XCD2_NODEID] = 2,
+   [XCD3_NODEID] = 3,
+   [XCD4_NODEID] = 4,
+   [XCD5_NODEID] = 5,
+   [XCD6_NODEID] = 6,
+   [XCD7_NODEID] = 7,
+};
+
 /**
  * amdgpu_irq_disable_all - disable *all* interrupts
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
index 7f78340f3572..a3543f121747 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
@@ -101,6 +101,20 @@ struct amdgpu_irq {
uint32_tsrbm_soft_reset;
 };
 
+enum interrupt_node_id_per_xcp {
+   XCD0_NODEID = 1,
+   XCD1_NODEID = 2,
+   XCD2_NODEID = 5,
+   XCD3_NODEID = 6,
+   XCD4_NODEID = 9,
+   XCD5_NODEID = 10,
+   XCD6_NODEID = 13,
+   XCD7_NODEID = 14,
+   NODEID_MAX,
+};
+
+extern const int node_id_to_phys_map[NODEID_MAX];
+
 void amdgpu_irq_disable_all(struct amdgpu_device *adev);
 
 int amdgpu_irq_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 5cc4c2c31b3a..3682b0a64200 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -2805,7 +2805,7 @@ static int gfx_v9_4_3_eop_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
 {
-   int i;
+   int i, phys_id;
u8 me_id, pipe_id, queue_id;
struct amdgpu_ring *ring;
 
@@ -2814,12 +2814,14 @@ static int gfx_v9_4_3_eop_irq(struct amdgpu_device 
*adev,
pipe_id = (entry->ring_id & 0x03) >> 0;
queue_id = (entry->ring_id & 0x70) >> 4;
 
+   phys_id = node_id_to_phys_map[entry->node_id];
+
switch (me_id) {
case 0:
case 1:
case 2:
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-   ring = &adev->gfx.compute_ring[i];
+   ring = &adev->gfx.compute_ring[i + phys_id * 
adev->gfx.num_compute_rings];
/* Per-queue interrupt is supported for MEC starting 
from VI.
  * The interrupt can only be enabled/disabled per pipe 
instead of per queue.
  */
-- 
2.39.2



[PATCH 05/32] drm/amdgpu: Add mode2 reset logic for v13.0.6

2023-03-28 Thread Alex Deucher
From: Lijo Lazar 

Mode2 reset for v13.0.6 has similar workflow as v13.0.2

Signed-off-by: Lijo Lazar 
Reviewed-by: Hawking Zhang 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
index 6437ead87e5f..eec41ad30406 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -40,6 +40,7 @@ int amdgpu_reset_init(struct amdgpu_device *adev)
 
switch (adev->ip_versions[MP1_HWIP][0]) {
case IP_VERSION(13, 0, 2):
+   case IP_VERSION(13, 0, 6):
ret = aldebaran_reset_init(adev);
break;
case IP_VERSION(11, 0, 7):
@@ -61,6 +62,7 @@ int amdgpu_reset_fini(struct amdgpu_device *adev)
 
switch (adev->ip_versions[MP1_HWIP][0]) {
case IP_VERSION(13, 0, 2):
+   case IP_VERSION(13, 0, 6):
ret = aldebaran_reset_fini(adev);
break;
case IP_VERSION(11, 0, 7):
-- 
2.39.2



[PATCH 04/32] drm/amdgpu: Add some XCC programming

2023-03-28 Thread Alex Deucher
From: Lijo Lazar 

Add additional XCC programming sequences.

Signed-off-by: Lijo Lazar 
Reviewed-by: Hawking Zhang 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 26 +
 1 file changed, 26 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 3682b0a64200..7b589f279ece 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -1155,6 +1155,29 @@ void gfx_v9_4_3_disable_gpa_mode(struct amdgpu_device 
*adev, int xcc_id)
WREG32_SOC15(GC, xcc_id, regCPC_PSP_DEBUG, data);
 }
 
+static void gfx_v9_4_3_program_xcc_id(struct amdgpu_device *adev, int xcc_id)
+{
+   uint32_t tmp = 0;
+
+   switch (adev->gfx.num_xcd) {
+   /* directly config VIRTUAL_XCC_ID to 0 for 1-XCC */
+   case 1:
+   WREG32_SOC15(GC, xcc_id, regCP_HYP_XCP_CTL, 0x8);
+   break;
+   case 2:
+   tmp = (xcc_id % adev->gfx.num_xcc_per_xcp) << 
REG_FIELD_SHIFT(CP_HYP_XCP_CTL, VIRTUAL_XCC_ID);
+   tmp = tmp | (adev->gfx.num_xcd << 
REG_FIELD_SHIFT(CP_HYP_XCP_CTL, NUM_XCC_IN_XCP));
+   WREG32_SOC15(GC, xcc_id, regCP_HYP_XCP_CTL, tmp);
+
+   tmp = xcc_id << REG_FIELD_SHIFT(CP_PSP_XCP_CTL, 
PHYSICAL_XCC_ID);
+   tmp = tmp | (xcc_id << REG_FIELD_SHIFT(CP_PSP_XCP_CTL, 
XCC_DIE_ID));
+   WREG32_SOC15(GC, xcc_id, regCP_PSP_XCP_CTL, tmp);
+   break;
+   default:
+   break;
+   }
+}
+
 static bool gfx_v9_4_3_is_rlc_enabled(struct amdgpu_device *adev)
 {
uint32_t rlc_setting;
@@ -1954,6 +1977,9 @@ static int gfx_v9_4_3_cp_resume(struct amdgpu_device 
*adev)
return r;
}
 
+   /* set the virtual and physical id based on partition_mode */
+   gfx_v9_4_3_program_xcc_id(adev, i);
+
r = gfx_v9_4_3_kiq_resume(adev, i);
if (r)
return r;
-- 
2.39.2



[PATCH 01/32] drm/amdgpu: support gc v9_4_3 ring_test running on all xcc

2023-03-28 Thread Alex Deucher
From: Hawking Zhang 

Each xcc has its own sratch_reg offset

Signed-off-by: Hawking Zhang 
Reviewed-by: Le Ma 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 204b069c587d..351bc16b95ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -232,20 +232,23 @@ static int gfx_v9_4_3_ring_test_ring(struct amdgpu_ring 
*ring)
uint32_t tmp = 0;
unsigned i;
int r;
+   /* scratch_reg0_offset is 32bit even with full XCD config */
+   uint32_t scratch_reg0_offset;
+
+   scratch_reg0_offset = SOC15_REG_OFFSET(GC, ring->xcc_id, 
regSCRATCH_REG0);
+   WREG32(scratch_reg0_offset, 0xCAFEDEAD);
 
-   WREG32_SOC15(GC, 0, regSCRATCH_REG0, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r)
return r;
 
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
-   amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0) -
- PACKET3_SET_UCONFIG_REG_START);
+   amdgpu_ring_write(ring, scratch_reg0_offset - 
PACKET3_SET_UCONFIG_REG_START);
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
 
for (i = 0; i < adev->usec_timeout; i++) {
-   tmp = RREG32_SOC15(GC, 0, regSCRATCH_REG0);
+   tmp = RREG32(scratch_reg0_offset);
if (tmp == 0xDEADBEEF)
break;
udelay(1);
-- 
2.39.2



[PATCH 02/32] drm/amdgpu: enable the ring and IB test for slave kcq

2023-03-28 Thread Alex Deucher
From: Shiwu Zhang 

With the mec FW update to utilize the mqd base set by
driver for kcq mapping, slave kcq ring test and IB test
can be re-enabled.

Signed-off-by: Shiwu Zhang 
Reviewed-by: Le Ma 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 61 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  |  5 --
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 12 ++---
 3 files changed, 33 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 14ea9bbc3715..5ef6fbe354c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -442,7 +442,7 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
 
/* prepare MQD backup */
adev->gfx.mec.mqd_backup[i + xcc_id * 
adev->gfx.num_compute_rings] = kmalloc(mqd_size, GFP_KERNEL);
-   if (!adev->gfx.mec.mqd_backup[i])
+   if (!adev->gfx.mec.mqd_backup[i + xcc_id * 
adev->gfx.num_compute_rings])
dev_warn(adev->dev, "no memory to create MQD 
backup for ring %s\n", ring->name);
}
}
@@ -468,8 +468,8 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int 
xcc_id)
 
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
j = i + xcc_id * adev->gfx.num_compute_rings;
-   ring = &adev->gfx.compute_ring[i];
-   kfree(adev->gfx.mec.mqd_backup[i]);
+   ring = &adev->gfx.compute_ring[j];
+   kfree(adev->gfx.mec.mqd_backup[j]);
amdgpu_bo_free_kernel(&ring->mqd_obj,
  &ring->mqd_gpu_addr,
  &ring->mqd_ptr);
@@ -494,22 +494,20 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, 
int xcc_id)
return -EINVAL;
 
spin_lock(&kiq->ring_lock);
-   if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
-   if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
-   adev->gfx.num_compute_rings)) {
-   spin_unlock(&kiq->ring_lock);
-   return -ENOMEM;
-   }
+   if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
+   adev->gfx.num_compute_rings)) {
+   spin_unlock(&kiq->ring_lock);
+   return -ENOMEM;
+   }
 
-   for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-   j = i + xcc_id * adev->gfx.num_compute_rings;
-   kiq->pmf->kiq_unmap_queues(kiq_ring,
-  &adev->gfx.compute_ring[i],
-  RESET_QUEUES, 0, 0);
-   }
+   for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+   j = i + xcc_id * adev->gfx.num_compute_rings;
+   kiq->pmf->kiq_unmap_queues(kiq_ring,
+  &adev->gfx.compute_ring[i],
+  RESET_QUEUES, 0, 0);
}
 
-   if (adev->gfx.kiq[0].ring.sched.ready && !adev->job_hang)
+   if (kiq_ring->sched.ready && !adev->job_hang)
r = amdgpu_ring_test_helper(kiq_ring);
spin_unlock(&kiq->ring_lock);
 
@@ -557,26 +555,23 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int 
xcc_id)
DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
kiq_ring->queue);
spin_lock(&kiq->ring_lock);
-   /* No need to map kcq on the slave */
-   if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
-   r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
-   adev->gfx.num_compute_rings +
-   kiq->pmf->set_resources_size);
-   if (r) {
-   DRM_ERROR("Failed to lock KIQ (%d).\n", r);
-   spin_unlock(&adev->gfx.kiq[0].ring_lock);
-   return r;
-   }
+   r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
+   adev->gfx.num_compute_rings +
+   kiq->pmf->set_resources_size);
+   if (r) {
+   DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+   spin_unlock(&kiq->ring_lock);
+   return r;
+   }
 
-   if (adev->enable_mes)
-   queue_mask = ~0ULL;
+   if (adev->enable_mes)
+   queue_mask = ~0ULL;
 
-   kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
-   for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-   j = i + xcc_id * adev->gfx.num_compute_rings;
+   

[PATCH 2/4] drm/amdgpu/vcn: enable vcn doorbell for vcn4.0.3

2023-03-28 Thread Alex Deucher
From: James Zhu 

Enable vcn doorbell for vcn4.0.3.

Signed-off-by: James Zhu 
Reviewed-by: Leo Liu 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 98bff162f453..4418c9f05ec4 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -109,7 +109,8 @@ static int vcn_v4_0_3_sw_init(void *handle)
return r;
 
ring = &adev->vcn.inst->ring_dec;
-   ring->use_doorbell = false;
+   ring->use_doorbell = true;
+   ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 5;
ring->vm_hub = AMDGPU_MMHUB0(0);
sprintf(ring->name, "vcn_dec");
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
@@ -174,6 +175,13 @@ static int vcn_v4_0_3_hw_init(void *handle)
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
int r;
 
+   adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+   ring->doorbell_index, ring->me);
+   if (ring->use_doorbell)
+   WREG32_SOC15(VCN, ring->me, regVCN_RB4_DB_CTRL,
+   ring->doorbell_index << VCN_RB4_DB_CTRL__OFFSET__SHIFT |
+   VCN_RB4_DB_CTRL__EN_MASK);
+
r = amdgpu_ring_test_helper(ring);
 
if (!r)
-- 
2.39.2



[PATCH 4/4] drm/amdgpu: fix vcn doorbell range setting

2023-03-28 Thread Alex Deucher
From: James Zhu 

Should use vcn_ring0_1 instead of doorbell index to
set nbio doorbell range.

Signed-off-by: James Zhu 
Reviewed-by: Sonny Jiang 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 4418c9f05ec4..ae2cc47d344a 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -176,7 +176,7 @@ static int vcn_v4_0_3_hw_init(void *handle)
int r;
 
adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
-   ring->doorbell_index, ring->me);
+   (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 
ring->me);
if (ring->use_doorbell)
WREG32_SOC15(VCN, ring->me, regVCN_RB4_DB_CTRL,
ring->doorbell_index << VCN_RB4_DB_CTRL__OFFSET__SHIFT |
-- 
2.39.2



[PATCH 1/4] drm/amdgpu/nbio: update vcn doorbell range

2023-03-28 Thread Alex Deucher
From: James Zhu 

VCN4.0.3 used up to 16 doorbells per partition.

Signed-off-by: James Zhu 
Reviewed-by: Leo Liu 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
index 5f2270b1d7de..27f456f32f82 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
@@ -161,7 +161,7 @@ static void nbio_v7_9_vcn_doorbell_range(struct 
amdgpu_device *adev, bool use_do
doorbell_range = REG_SET_FIELD(doorbell_range,
DOORBELL0_CTRL_ENTRY_0,
BIF_DOORBELL0_RANGE_SIZE_ENTRY,
-   0x8);
+   0x10);
 
doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
S2A_DOORBELL_ENTRY_1_CTRL,
@@ -174,7 +174,7 @@ static void nbio_v7_9_vcn_doorbell_range(struct 
amdgpu_device *adev, bool use_do
S2A_DOORBELL_PORT1_RANGE_OFFSET, 0x4);
doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
S2A_DOORBELL_ENTRY_1_CTRL,
-   S2A_DOORBELL_PORT1_RANGE_SIZE, 0x8);
+   S2A_DOORBELL_PORT1_RANGE_SIZE, 0x10);
doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
S2A_DOORBELL_ENTRY_1_CTRL,
S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE, 0x4);
-- 
2.39.2



[PATCH 3/4] drm/amdgpu/jpeg: enable jpeg doorbell for jpeg4.0.3

2023-03-28 Thread Alex Deucher
From: James Zhu 

Enable jpeg doorbell for jpeg4.0.3.

Signed-off-by: James Zhu 
Reviewed-by: Leo Liu 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 13 +++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index 0d3509409d3a..8914f3c6c80f 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -100,8 +100,9 @@ static int jpeg_v4_0_3_sw_init(void *handle)
 
for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) {
ring = &adev->jpeg.inst->ring_dec[i];
-   ring->use_doorbell = false;
+   ring->use_doorbell = true;
ring->vm_hub = AMDGPU_MMHUB0(0);
+   ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 
1) + (i?8:1) + i;
sprintf(ring->name, "jpeg_dec_%d", i);
r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
AMDGPU_RING_PRIO_DEFAULT, NULL);
@@ -148,11 +149,19 @@ static int jpeg_v4_0_3_sw_fini(void *handle)
 static int jpeg_v4_0_3_hw_init(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-   struct amdgpu_ring *ring;
+   struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
int i, r;
 
+   adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+   (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
+
for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) {
ring = &adev->jpeg.inst->ring_dec[i];
+   if (ring->use_doorbell)
+   WREG32_SOC15_OFFSET(VCN, 0, regVCN_JPEG_DB_CTRL,
+   (ring->pipe?(ring->pipe - 0x15):0),
+   ring->doorbell_index << 
VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+   VCN_JPEG_DB_CTRL__EN_MASK);
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
-- 
2.39.2



[PATCH] drm/amdkfd: Set F8_MODE for gc_v9_4_3

2023-03-28 Thread Alex Deucher
From: Amber Lin 

Set F8_MODE for GC 9.4.3 as optimal/non-IEEE. Also update gc_v9_0
to gc_v9_4_3 to include more definitions such as the F8_MODE bit, and
remove unused header files.

v2: fix IP version check (Alex)

Signed-off-by: Amber Lin 
Reviewed-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
index 8b2dd2670ab7..914d94679d73 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
@@ -24,9 +24,7 @@
 
 #include "kfd_device_queue_manager.h"
 #include "vega10_enum.h"
-#include "gc/gc_9_0_offset.h"
-#include "gc/gc_9_0_sh_mask.h"
-#include "sdma0/sdma0_4_0_sh_mask.h"
+#include "gc/gc_9_4_3_sh_mask.h"
 
 static int update_qpd_v9(struct device_queue_manager *dqm,
 struct qcm_process_device *qpd);
@@ -65,6 +63,10 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
if (dqm->dev->noretry && !dqm->dev->use_iommu_v2)
qpd->sh_mem_config |= 1 << 
SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
 
+   if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3))
+   qpd->sh_mem_config |=
+   (1 << SH_MEM_CONFIG__F8_MODE__SHIFT);
+
qpd->sh_mem_ape1_limit = 0;
qpd->sh_mem_ape1_base = 0;
}
-- 
2.39.2



Re: [Resend PATCH v1 0/3] send message to pmfw when SMT changes

2023-03-28 Thread Guilherme G. Piccoli
On 28/03/2023 03:07, Yang, WenYou wrote:
> [AMD Official Use Only - General]
> [...]
>> Hi Wenyou, thank you for the clarification and for the interesting patch set!
>>
>> So, just so I can understand: is it expected that gamers disable SMT? I heard
>> some games got their performance improved, but not sure the reason...if you
>> have thoughts on that, I'm pretty interested!
> Hi Guilherme,
> 
> No, it not. It is not to disable SMT.
> 
> Yes, there is a commit to get performance improved.
> https://github.com/torvalds/linux/commit/a8fb40966f19ff81520d9ccf8f7e2b95201368b8
> 
> Best Regards,
> Wenyou

Thanks, this one is present in Deck's kernel for a while.
Cheers,


Guilherme


Re: [PATCH] drm/amdgpu: enable sysfs node pp_dpm_vclk1 for some asics

2023-03-28 Thread Luben Tuikov
Looks good--thanks!

Acked-by: Luben Tuikov 

Regards,
Luben

On 2023-03-28 07:41, Tong Liu01 wrote:
> Add sysfs node pp_dpm_vclk1 for gc11.0.3
> 
> Signed-off-by: Tong Liu01 
> ---
>  .../gpu/drm/amd/include/kgd_pp_interface.h|  1 +
>  drivers/gpu/drm/amd/pm/amdgpu_pm.c| 22 +++
>  drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c |  4 
>  3 files changed, 27 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
> b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> index 86b6b0c9fb02..fe75497eeeab 100644
> --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> @@ -104,6 +104,7 @@ enum pp_clock_type {
>   PP_FCLK,
>   PP_DCEFCLK,
>   PP_VCLK,
> + PP_VCLK1,
>   PP_DCLK,
>   OD_SCLK,
>   OD_MCLK,
> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
> b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> index d75a67cfe523..1da6e9469450 100644
> --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> @@ -1180,6 +1180,21 @@ static ssize_t amdgpu_set_pp_dpm_vclk(struct device 
> *dev,
>   return amdgpu_set_pp_dpm_clock(dev, PP_VCLK, buf, count);
>  }
>  
> +static ssize_t amdgpu_get_pp_dpm_vclk1(struct device *dev,
> + struct device_attribute *attr,
> + char *buf)
> +{
> + return amdgpu_get_pp_dpm_clock(dev, PP_VCLK1, buf);
> +}
> +
> +static ssize_t amdgpu_set_pp_dpm_vclk1(struct device *dev,
> + struct device_attribute *attr,
> + const char *buf,
> + size_t count)
> +{
> + return amdgpu_set_pp_dpm_clock(dev, PP_VCLK1, buf, count);
> +}
> +
>  static ssize_t amdgpu_get_pp_dpm_dclk(struct device *dev,
>   struct device_attribute *attr,
>   char *buf)
> @@ -2002,6 +2017,7 @@ static struct amdgpu_device_attr amdgpu_device_attrs[] 
> = {
>   AMDGPU_DEVICE_ATTR_RW(pp_dpm_socclk,
> ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
>   AMDGPU_DEVICE_ATTR_RW(pp_dpm_fclk,  
> ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
>   AMDGPU_DEVICE_ATTR_RW(pp_dpm_vclk,  
> ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
> + AMDGPU_DEVICE_ATTR_RW(pp_dpm_vclk1, 
> ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
>   AMDGPU_DEVICE_ATTR_RW(pp_dpm_dclk,  
> ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
>   AMDGPU_DEVICE_ATTR_RW(pp_dpm_dcefclk,   
> ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
>   AMDGPU_DEVICE_ATTR_RW(pp_dpm_pcie,  
> ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
> @@ -2091,6 +2107,12 @@ static int default_attr_update(struct amdgpu_device 
> *adev, struct amdgpu_device_
> gc_ver == IP_VERSION(11, 0, 2) ||
> gc_ver == IP_VERSION(11, 0, 3)))
>   *states = ATTR_STATE_UNSUPPORTED;
> + } else if (DEVICE_ATTR_IS(pp_dpm_vclk1)) {
> + if (!((gc_ver == IP_VERSION(10, 3, 1) ||
> +gc_ver == IP_VERSION(10, 3, 0) ||
> +gc_ver == IP_VERSION(11, 0, 2) ||
> +gc_ver == IP_VERSION(11, 0, 3)) && 
> adev->vcn.num_vcn_inst >= 2))
> + *states = ATTR_STATE_UNSUPPORTED;
>   } else if (DEVICE_ATTR_IS(pp_dpm_dclk)) {
>   if (!(gc_ver == IP_VERSION(10, 3, 1) ||
> gc_ver == IP_VERSION(10, 3, 0) ||
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
> b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> index b5d64749990e..bffbef3f666d 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> @@ -2006,6 +2006,8 @@ static int smu_force_ppclk_levels(void *handle,
>   clk_type = SMU_DCEFCLK; break;
>   case PP_VCLK:
>   clk_type = SMU_VCLK; break;
> + case PP_VCLK1:
> + clk_type = SMU_VCLK1; break;
>   case PP_DCLK:
>   clk_type = SMU_DCLK; break;
>   case OD_SCLK:
> @@ -2393,6 +2395,8 @@ static enum smu_clk_type smu_convert_to_smuclk(enum 
> pp_clock_type type)
>   clk_type = SMU_DCEFCLK; break;
>   case PP_VCLK:
>   clk_type = SMU_VCLK; break;
> + case PP_VCLK1:
> + clk_type = SMU_VCLK1; break;
>   case PP_DCLK:
>   clk_type = SMU_DCLK; break;
>   case OD_SCLK:



[PATCH] drm/amdgpu: enable sysfs node pp_dpm_vclk1 for some asics

2023-03-28 Thread Tong Liu01
Add sysfs node pp_dpm_vclk1 for gc11.0.3

Signed-off-by: Tong Liu01 
---
 .../gpu/drm/amd/include/kgd_pp_interface.h|  1 +
 drivers/gpu/drm/amd/pm/amdgpu_pm.c| 22 +++
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c |  4 
 3 files changed, 27 insertions(+)

diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 86b6b0c9fb02..fe75497eeeab 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -104,6 +104,7 @@ enum pp_clock_type {
PP_FCLK,
PP_DCEFCLK,
PP_VCLK,
+   PP_VCLK1,
PP_DCLK,
OD_SCLK,
OD_MCLK,
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index d75a67cfe523..1da6e9469450 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -1180,6 +1180,21 @@ static ssize_t amdgpu_set_pp_dpm_vclk(struct device *dev,
return amdgpu_set_pp_dpm_clock(dev, PP_VCLK, buf, count);
 }
 
+static ssize_t amdgpu_get_pp_dpm_vclk1(struct device *dev,
+   struct device_attribute *attr,
+   char *buf)
+{
+   return amdgpu_get_pp_dpm_clock(dev, PP_VCLK1, buf);
+}
+
+static ssize_t amdgpu_set_pp_dpm_vclk1(struct device *dev,
+   struct device_attribute *attr,
+   const char *buf,
+   size_t count)
+{
+   return amdgpu_set_pp_dpm_clock(dev, PP_VCLK1, buf, count);
+}
+
 static ssize_t amdgpu_get_pp_dpm_dclk(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -2002,6 +2017,7 @@ static struct amdgpu_device_attr amdgpu_device_attrs[] = {
AMDGPU_DEVICE_ATTR_RW(pp_dpm_socclk,
ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
AMDGPU_DEVICE_ATTR_RW(pp_dpm_fclk,  
ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
AMDGPU_DEVICE_ATTR_RW(pp_dpm_vclk,  
ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
+   AMDGPU_DEVICE_ATTR_RW(pp_dpm_vclk1, 
ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
AMDGPU_DEVICE_ATTR_RW(pp_dpm_dclk,  
ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
AMDGPU_DEVICE_ATTR_RW(pp_dpm_dcefclk,   
ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
AMDGPU_DEVICE_ATTR_RW(pp_dpm_pcie,  
ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
@@ -2091,6 +2107,12 @@ static int default_attr_update(struct amdgpu_device 
*adev, struct amdgpu_device_
  gc_ver == IP_VERSION(11, 0, 2) ||
  gc_ver == IP_VERSION(11, 0, 3)))
*states = ATTR_STATE_UNSUPPORTED;
+   } else if (DEVICE_ATTR_IS(pp_dpm_vclk1)) {
+   if (!((gc_ver == IP_VERSION(10, 3, 1) ||
+  gc_ver == IP_VERSION(10, 3, 0) ||
+  gc_ver == IP_VERSION(11, 0, 2) ||
+  gc_ver == IP_VERSION(11, 0, 3)) && 
adev->vcn.num_vcn_inst >= 2))
+   *states = ATTR_STATE_UNSUPPORTED;
} else if (DEVICE_ATTR_IS(pp_dpm_dclk)) {
if (!(gc_ver == IP_VERSION(10, 3, 1) ||
  gc_ver == IP_VERSION(10, 3, 0) ||
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index b5d64749990e..bffbef3f666d 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -2006,6 +2006,8 @@ static int smu_force_ppclk_levels(void *handle,
clk_type = SMU_DCEFCLK; break;
case PP_VCLK:
clk_type = SMU_VCLK; break;
+   case PP_VCLK1:
+   clk_type = SMU_VCLK1; break;
case PP_DCLK:
clk_type = SMU_DCLK; break;
case OD_SCLK:
@@ -2393,6 +2395,8 @@ static enum smu_clk_type smu_convert_to_smuclk(enum 
pp_clock_type type)
clk_type = SMU_DCEFCLK; break;
case PP_VCLK:
clk_type = SMU_VCLK; break;
+   case PP_VCLK1:
+   clk_type = SMU_VCLK1; break;
case PP_DCLK:
clk_type = SMU_DCLK; break;
case OD_SCLK:
-- 
2.34.1



Re: [PATCH] drm/amdgpu: Fix desktop freezed after gpu-reset

2023-03-28 Thread Christian König

Am 27.03.23 um 17:20 schrieb Alan Liu:

[Why]
After gpu-reset, sometimes the driver would fail to enable vblank irq,
causing flip_done timed out and the desktop freezed.

During gpu-reset, we will disable and enable vblank irq in dm_suspend()
and dm_resume(). Later on in amdgpu_irq_gpu_reset_resume_helper(), we
will check irqs' refcount and decide to enable or disable the irqs again.

However, we have 2 sets of API for controling vblank irq, one is
dm_vblank_get/put() and another is amdgpu_irq_get/put(). Each API has
its own refcount and flag to store the state of vblank irq, and they
are not synchronized.


This is the source of the problem and you should address this instead. 
The change you suggested below would break in some use cases.




In drm we use the first API to control vblank irq but in
amdgpu_irq_gpu_reset_resume_helper() we use the second set of API.

The failure happens when vblank irq was enabled by dm_vblank_get() before
gpu-reset, we have vblank->enabled true. However, during gpu-reset, in
amdgpu_irq_gpu_reset_resume_helper(), vblank irq's state checked from
amdgpu_irq_update() is DISABLED. So finally it will disable vblank irq
again. After gpu-reset, if there is a cursor plane commit, the driver
will try to enable vblank irq by calling drm_vblank_enable(), but the
vblank->enabled is still true, so it fails to turn on vblank irq and
causes flip_done can't be completed in vblank irq handler and desktop
become freezed.

[How]
Combining the 2 vblank control APIs by letting drm's API finally calls
amdgpu_irq's API, so the irq's refcount and state of both APIs can be
synchronized. Also add a check to prevent refcount from being less then
0 in amdgpu_irq_put().

Signed-off-by: Alan Liu 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c|  3 +++
  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 14 ++
  2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index a6aef488a822..1b66003657e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -597,6 +597,9 @@ int amdgpu_irq_put(struct amdgpu_device *adev, struct 
amdgpu_irq_src *src,
if (!src->enabled_types || !src->funcs->set)
return -EINVAL;
  
+	if (!amdgpu_irq_enabled(adev, src, type))

+   return 0;
+


That is racy and won't work. The intention of amdgpu_irq_update() is to 
always update the irq state, no matter what the status is.


Regards,
Christian.


if (atomic_dec_and_test(&src->enabled_types[type]))
return amdgpu_irq_update(adev, src, type);
  
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c

index dc4f37240beb..e04f846b0b19 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
@@ -146,7 +146,7 @@ static void vblank_control_worker(struct work_struct *work)
  
  static inline int dm_set_vblank(struct drm_crtc *crtc, bool enable)

  {
-   enum dc_irq_source irq_source;
+   int irq_type;
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
struct amdgpu_device *adev = drm_to_adev(crtc->dev);
struct dm_crtc_state *acrtc_state = to_dm_crtc_state(crtc->state);
@@ -169,10 +169,16 @@ static inline int dm_set_vblank(struct drm_crtc *crtc, 
bool enable)
if (rc)
return rc;
  
-	irq_source = IRQ_TYPE_VBLANK + acrtc->otg_inst;

+   irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, acrtc->crtc_id);
+
+   if (enable)
+   rc = amdgpu_irq_get(adev, &adev->crtc_irq, irq_type);
+
+   else
+   rc = amdgpu_irq_put(adev, &adev->crtc_irq, irq_type);
  
-	if (!dc_interrupt_set(adev->dm.dc, irq_source, enable))

-   return -EBUSY;
+   if (rc)
+   return rc;
  
  skip:

if (amdgpu_in_reset(adev))




RE: [PATCH] drm/amdgpu: correct xgmi_wafl block name

2023-03-28 Thread Zhou1, Tao
[AMD Official Use Only - General]

Reviewed-by: Tao Zhou 

> -Original Message-
> From: Zhang, Hawking 
> Sent: Tuesday, March 28, 2023 6:50 PM
> To: amd-gfx@lists.freedesktop.org; Zhou1, Tao 
> Cc: Zhang, Hawking 
> Subject: [PATCH] drm/amdgpu: correct xgmi_wafl block name
> 
> fix backward compatibility issue to stay with the old name of xgmi_wafl node.
> 
> Signed-off-by: Hawking Zhang 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> index 3fe24348d199..439925477fb8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> @@ -1068,7 +1068,7 @@ int amdgpu_xgmi_ras_sw_init(struct amdgpu_device
> *adev)
>   return err;
>   }
> 
> - strcpy(ras->ras_block.ras_comm.name, "xgmi_wafl_pcs");
> + strcpy(ras->ras_block.ras_comm.name, "xgmi_wafl");
>   ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__XGMI_WAFL;
>   ras->ras_block.ras_comm.type =
> AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
>   adev->gmc.xgmi.ras_if = &ras->ras_block.ras_comm;
> --
> 2.17.1


[PATCH] drm/amdgpu: correct xgmi_wafl block name

2023-03-28 Thread Hawking Zhang
fix backward compatibility issue to stay with
the old name of xgmi_wafl node.

Signed-off-by: Hawking Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 3fe24348d199..439925477fb8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -1068,7 +1068,7 @@ int amdgpu_xgmi_ras_sw_init(struct amdgpu_device *adev)
return err;
}
 
-   strcpy(ras->ras_block.ras_comm.name, "xgmi_wafl_pcs");
+   strcpy(ras->ras_block.ras_comm.name, "xgmi_wafl");
ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__XGMI_WAFL;
ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
adev->gmc.xgmi.ras_if = &ras->ras_block.ras_comm;
-- 
2.17.1



Re: [PATCH 00/12] drm/amd: Remove unused variables

2023-03-28 Thread Jani Nikula
On Mon, 27 Mar 2023, Caio Novais  wrote:
> This patchset cleans the code removing unused variables and one unused
> function.
>
> Caio Novais (12):
>   Remove unused variable 'r'
>   Remove unused variable 'value0'
>   Remove unused variable 'pixel_width'
>   Remove unused variable 'hubp'
>   Remove unused variable 'speakers'
>   Remove unused variable 'mc_vm_apt_default'
>   Remove unused variable 'optc'
>   Remove two unused variables 'speakers' and 'channels' and remove
> unused function 'speakers_to_channels'
>   Remove two unused variables 'is_pipe_split_expected' and 'state'
>   Remove unused variable 'cursor_bpp'
>   Remove unused variable 'scl_enable'
>   Remove two unused variables 'result_write_min_hblank' and
> 'hblank_size'

Curious, how did you create this? It does not match the patches.

BR,
Jani.

>
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c   |  8 
>  .../amd/display/dc/dcn10/dcn10_link_encoder.c |  3 --
>  .../drm/amd/display/dc/dcn201/dcn201_dpp.c|  6 ---
>  .../drm/amd/display/dc/dcn201/dcn201_hwseq.c  |  2 -
>  .../gpu/drm/amd/display/dc/dcn30/dcn30_afmt.c |  2 -
>  .../gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c |  4 --
>  .../drm/amd/display/dc/dcn30/dcn30_hwseq.c|  3 --
>  .../gpu/drm/amd/display/dc/dcn31/dcn31_apg.c  | 39 ---
>  .../drm/amd/display/dc/dcn32/dcn32_resource.c |  4 --
>  .../display/dc/dcn32/dcn32_resource_helpers.c |  4 --
>  .../dc/dml/dcn31/display_rq_dlg_calc_31.c |  2 -
>  .../dc/link/protocols/link_dp_capability.c|  7 
>  12 files changed, 84 deletions(-)

-- 
Jani Nikula, Intel Open Source Graphics Center


Re: [PATCH 27/34] drm/amdkfd: add debug set and clear address watch points operation

2023-03-28 Thread kernel test robot
Hi Jonathan,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on drm/drm-next]
[also build test WARNING on drm-exynos/exynos-drm-next drm-tip/drm-tip 
next-20230328]
[cannot apply to drm-misc/drm-misc-next drm-intel/for-linux-next 
drm-intel/for-linux-next-fixes linus/master v6.3-rc4]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:
https://github.com/intel-lab-lkp/linux/commits/Jonathan-Kim/drm-amdkfd-display-debug-capabilities/20230328-024632
base:   git://anongit.freedesktop.org/drm/drm drm-next
patch link:
https://lore.kernel.org/r/20230327184339.125016-27-jonathan.kim%40amd.com
patch subject: [PATCH 27/34] drm/amdkfd: add debug set and clear address watch 
points operation
config: x86_64-allyesconfig 
(https://download.01.org/0day-ci/archive/20230328/202303281754.75z2vixi-...@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-8) 11.3.0
reproduce (this is a W=1 build):
# 
https://github.com/intel-lab-lkp/linux/commit/8c5e1781294f7e41d41632cb46e533c598933cd8
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review 
Jonathan-Kim/drm-amdkfd-display-debug-capabilities/20230328-024632
git checkout 8c5e1781294f7e41d41632cb46e533c598933cd8
# save the config file
mkdir build_dir && cp config build_dir/.config
make W=1 O=build_dir ARCH=x86_64 olddefconfig
make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash 
drivers/gpu/drm/amd/amdgpu/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot 
| Link: 
https://lore.kernel.org/oe-kbuild-all/202303281754.75z2vixi-...@intel.com/

All warnings (new ones prefixed by >>):

>> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c:164:10: warning: no 
>> previous prototype for 'kgd_gfx_aldebaran_clear_address_watch' 
>> [-Wmissing-prototypes]
 164 | uint32_t kgd_gfx_aldebaran_clear_address_watch(struct amdgpu_device 
*adev,
 |  ^
   In file included from drivers/gpu/drm/amd/amdgpu/../display/dc/dc_types.h:37,
from 
drivers/gpu/drm/amd/amdgpu/../display/dc/dm_services_types.h:30,
from 
drivers/gpu/drm/amd/amdgpu/../include/dm_pp_interface.h:26,
from drivers/gpu/drm/amd/amdgpu/amdgpu.h:63,
from 
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c:22:
   drivers/gpu/drm/amd/amdgpu/../display/dc/dc_hdmi_types.h:53:22: warning: 
'dp_hdmi_dongle_signature_str' defined but not used [-Wunused-const-variable=]
  53 | static const uint8_t dp_hdmi_dongle_signature_str[] = "DP-HDMI 
ADAPTOR";
 |  ^~~~
--
>> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c:781:10: warning: no 
>> previous prototype for 'kgd_gfx_v11_clear_address_watch' 
>> [-Wmissing-prototypes]
 781 | uint32_t kgd_gfx_v11_clear_address_watch(struct amdgpu_device *adev,
 |  ^~~
   In file included from drivers/gpu/drm/amd/amdgpu/../display/dc/dc_types.h:37,
from 
drivers/gpu/drm/amd/amdgpu/../display/dc/dm_services_types.h:30,
from 
drivers/gpu/drm/amd/amdgpu/../include/dm_pp_interface.h:26,
from drivers/gpu/drm/amd/amdgpu/amdgpu.h:63,
from drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c:23:
   drivers/gpu/drm/amd/amdgpu/../display/dc/dc_hdmi_types.h:53:22: warning: 
'dp_hdmi_dongle_signature_str' defined but not used [-Wunused-const-variable=]
  53 | static const uint8_t dp_hdmi_dongle_signature_str[] = "DP-HDMI 
ADAPTOR";
 |  ^~~~


vim +/kgd_gfx_aldebaran_clear_address_watch +164 
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c

   163  
 > 164  uint32_t kgd_gfx_aldebaran_clear_address_watch(struct amdgpu_device 
 > *adev,
   165  uint32_t watch_id)
   166  {
   167  return 0;
   168  }
   169  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests


RE: [PATCH v2 2/3] drm/amd/pm: send the SMT enable message to pmfw

2023-03-28 Thread Yuan, Perry
[AMD Official Use Only - General]

Hi Lijo.


> -Original Message-
> From: Lazar, Lijo 
> Sent: Monday, March 27, 2023 8:34 PM
> To: Yang, WenYou ; Deucher, Alexander
> ; Koenig, Christian
> ; Pan, Xinhui ; Quan,
> Evan ; Limonciello, Mario
> 
> Cc: Yuan, Perry ; Li, Ying ; amd-
> g...@lists.freedesktop.org; gpicc...@igalia.com; Liu, Kun
> ; Liang, Richard qi 
> Subject: Re: [PATCH v2 2/3] drm/amd/pm: send the SMT enable message to
> pmfw
> 
> 
> 
> On 3/27/2023 12:54 PM, Wenyou Yang wrote:
> > When the CPU SMT status is changed in the fly, sent the SMT enable
> > message to pmfw to notify it that the SMT status changed.
> >
> > Signed-off-by: Wenyou Yang 
> > ---
> >   drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 39
> +++
> >   drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  7 
> >   2 files changed, 46 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> > b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> > index b5d64749990e..eb4c49f38292 100644
> > --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> > +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> > @@ -22,6 +22,7 @@
> >
> >   #define SWSMU_CODE_LAYER_L1
> >
> > +#include 
> >   #include 
> >   #include 
> >
> > @@ -54,6 +55,8 @@
> >   #undef pr_info
> >   #undef pr_debug
> >
> > +extern struct raw_notifier_head smt_notifier_head;
> > +
> >   static const struct amd_pm_funcs swsmu_pm_funcs;
> >   static int smu_force_smuclk_levels(struct smu_context *smu,
> >enum smu_clk_type clk_type,
> > @@ -69,6 +72,9 @@ static int smu_set_fan_speed_rpm(void *handle,
> uint32_t speed);
> >   static int smu_set_gfx_cgpg(struct smu_context *smu, bool enabled);
> >   static int smu_set_mp1_state(void *handle, enum pp_mp1_state
> > mp1_state);
> >
> > +static int smt_notifier_callback(struct notifier_block *nb,
> > +unsigned long action, void *data);
> > +
> >   static int smu_sys_get_pp_feature_mask(void *handle,
> >char *buf)
> >   {
> > @@ -647,6 +653,8 @@ static int smu_early_init(void *handle)
> > adev->powerplay.pp_handle = smu;
> > adev->powerplay.pp_funcs = &swsmu_pm_funcs;
> >
> > +   smu->nb.notifier_call = smt_notifier_callback;
> > +
> > r = smu_set_funcs(adev);
> > if (r)
> > return r;
> > @@ -1105,6 +1113,8 @@ static int smu_sw_init(void *handle)
> > if (!smu->ppt_funcs->get_fan_control_mode)
> > smu->adev->pm.no_fan = true;
> >
> > +   raw_notifier_chain_register(&smt_notifier_head, &smu->nb);
> > +
> 
> As mentioned before, it's not a blind registration for any ASIC. This should
> only be done by the ASICs which are interested in the notification and not 
> here.
> So this should be somewhere inside vangogh_set_ppt_funcs or part of a
> software init callback like vangogh_init_smc_tables.
> 
> Thanks,
> Lijo

You are right,  only Vangogh PMFW will handle the  CCLK PD Limit update 
request. 

Hi @Yang, WenYou

It will need to limit the update message within the Vangogh Asic.

> 
> > return 0;
> >   }
> >
> > @@ -1122,6 +1132,9 @@ static int smu_sw_fini(void *handle)
> >
> > smu_fini_microcode(smu);
> >
> > +   if (smu->nb.notifier_call != NULL)
> > +   raw_notifier_chain_unregister(&smt_notifier_head, &smu-
> >nb);
> > +
> > return 0;
> >   }
> >
> > @@ -3241,3 +3254,29 @@ int smu_send_hbm_bad_channel_flag(struct
> > smu_context *smu, uint32_t size)
> >
> > return ret;
> >   }
> > +
> > +static int smu_set_cpu_smt_enable(struct smu_context *smu, bool
> > +enable) {
> > +   int ret = -EINVAL;
> > +
> > +   if (smu->ppt_funcs && smu->ppt_funcs->set_cpu_smt_enable)
> > +   ret = smu->ppt_funcs->set_cpu_smt_enable(smu, enable);
> > +
> > +   return ret;
> > +}
> > +
> > +static int smt_notifier_callback(struct notifier_block *nb,
> > +unsigned long action, void *data) {
> > +   struct smu_context *smu = container_of(nb, struct smu_context, nb);
> > +   int ret;
> > +
> > +   smu = container_of(nb, struct smu_context, nb);
> > +
> > +   ret = smu_set_cpu_smt_enable(smu, action == SMT_ENABLED);
> > +
> > +   dev_dbg(smu->adev->dev, "failed to set cclk_pd_limit for
> SMT %sabled: %d\n",
> > +   action == SMT_ENABLED ? "en" : "dis", ret);
> > +
> > +   return ret ? NOTIFY_BAD : NOTIFY_OK; }
> > diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> > b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> > index 09469c750a96..4d51ac5ec8ba 100644
> > --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> > +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> > @@ -566,6 +566,8 @@ struct smu_context
> >
> > struct firmware pptable_firmware;
> >
> > +   struct notifier_block nb;
> > +
> > u32 param_reg;
> > u32 msg_reg;
> > u32 resp_reg;
> > @@ -1354,6 +1356,11 @@ struct pptable_funcs {
> >  * @init_pptable_microcode: Prepare the pptable microcode to
> upload via PSP
> >  */
> > i

RE: [PATCH] drm/amdgpu: Add JPEG IP block to SRIOV reinit

2023-03-28 Thread Chen, Horace
[AMD Official Use Only - General]

Reviewed-by: Horace Chen 

-Original Message-
From: amd-gfx  On Behalf Of Yifan Zha
Sent: Tuesday, March 28, 2023 12:07 PM
To: amd-gfx@lists.freedesktop.org; Deucher, Alexander 
; Chen, Horace ; Zhang, Hawking 
; Chang, HaiJun 
Cc: Zha, YiFan(Even) 
Subject: [PATCH] drm/amdgpu: Add JPEG IP block to SRIOV reinit

[Why]
Reset(mode1) failed as JPRG IP did not reinit under sriov.

[How]
Add JPEG IP block to sriov reinit function.

Signed-off-by: Yifan Zha 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 065f5396d0ce..3b6b85d9e0be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3183,7 +3183,8 @@ static int amdgpu_device_ip_reinit_late_sriov(struct 
amdgpu_device *adev)
AMD_IP_BLOCK_TYPE_MES,
AMD_IP_BLOCK_TYPE_UVD,
AMD_IP_BLOCK_TYPE_VCE,
-   AMD_IP_BLOCK_TYPE_VCN
+   AMD_IP_BLOCK_TYPE_VCN,
+   AMD_IP_BLOCK_TYPE_JPEG
};

for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
--
2.25.1