[PATCH v1 3/3] drm/amd/pm: vangogh: support to send SMT enable message

2023-03-21 Thread Wenyou Yang
Add the support to PPSMC_MSG_SetCClkSMTEnable(0x58) message to pmfw
for vangogh.

Signed-off-by: Wenyou Yang 
---
 .../pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h|  3 ++-
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h  |  3 ++-
 .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c  | 19 +++
 3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h
index 7471e2df2828..2b182dbc6f9c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h
@@ -111,7 +111,8 @@
 #define PPSMC_MSG_GetGfxOffStatus 0x50
 #define PPSMC_MSG_GetGfxOffEntryCount 0x51
 #define PPSMC_MSG_LogGfxOffResidency  0x52
-#define PPSMC_Message_Count0x53
+#define PPSMC_MSG_SetCClkSMTEnable0x58
+#define PPSMC_Message_Count0x54
 
 //Argument for PPSMC_MSG_GfxDeviceDriverReset
 enum {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
index 297b70b9388f..820812d910bf 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
@@ -245,7 +245,8 @@
__SMU_DUMMY_MAP(AllowGpo),  \
__SMU_DUMMY_MAP(Mode2Reset),\
__SMU_DUMMY_MAP(RequestI2cTransaction), \
-   __SMU_DUMMY_MAP(GetMetricsTable),
+   __SMU_DUMMY_MAP(GetMetricsTable), \
+   __SMU_DUMMY_MAP(SetCClkSMTEnable),
 
 #undef __SMU_DUMMY_MAP
 #define __SMU_DUMMY_MAP(type)  SMU_MSG_##type
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
index 7433dcaa16e0..f0eeb42df96b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
@@ -141,6 +141,7 @@ static struct cmn2asic_msg_mapping 
vangogh_message_map[SMU_MSG_MAX_COUNT] = {
MSG_MAP(GetGfxOffStatus,PPSMC_MSG_GetGfxOffStatus,  
0),
MSG_MAP(GetGfxOffEntryCount,
PPSMC_MSG_GetGfxOffEntryCount,  0),
MSG_MAP(LogGfxOffResidency, 
PPSMC_MSG_LogGfxOffResidency,   0),
+   MSG_MAP(SetCClkSMTEnable,   PPSMC_MSG_SetCClkSMTEnable, 
0),
 };
 
 static struct cmn2asic_mapping vangogh_feature_mask_map[SMU_FEATURE_COUNT] = {
@@ -2428,6 +2429,23 @@ static u32 vangogh_get_gfxoff_entrycount(struct 
smu_context *smu, uint64_t *entr
return ret;
 }
 
+static int vangogh_set_cpu_smt_enable(struct smu_context *smu, bool enable)
+{
+   int ret = 0;
+
+   if (enable) {
+   ret = smu_cmn_send_smc_msg_with_param(smu,
+ SMU_MSG_SetCClkSMTEnable,
+ 1, NULL);
+   } else {
+   ret = smu_cmn_send_smc_msg_with_param(smu,
+ SMU_MSG_SetCClkSMTEnable,
+ 0, NULL);
+   }
+
+   return ret;
+}
+
 static const struct pptable_funcs vangogh_ppt_funcs = {
 
.check_fw_status = smu_v11_0_check_fw_status,
@@ -2474,6 +2492,7 @@ static const struct pptable_funcs vangogh_ppt_funcs = {
.get_power_limit = vangogh_get_power_limit,
.set_power_limit = vangogh_set_power_limit,
.get_vbios_bootup_values = smu_v11_0_get_vbios_bootup_values,
+   .set_cpu_smt_enable = vangogh_set_cpu_smt_enable,
 };
 
 void vangogh_set_ppt_funcs(struct smu_context *smu)
-- 
2.39.2



[PATCH v1 1/3] cpu/smt: add a notifier to notify the SMT changes

2023-03-21 Thread Wenyou Yang
Add the notifier chain to notify the cpu SMT status changes

Signed-off-by: Wenyou Yang 
---
 include/linux/cpu.h |  5 +
 kernel/cpu.c| 11 ++-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 314802f98b9d..9a842317fe2d 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -213,6 +213,11 @@ enum cpuhp_smt_control {
CPU_SMT_NOT_IMPLEMENTED,
 };
 
+enum cpuhp_smt_status {
+   SMT_ENABLED,
+   SMT_DISABLED,
+};
+
 #if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT)
 extern enum cpuhp_smt_control cpu_smt_control;
 extern void cpu_smt_disable(bool force);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 6c0a92ca6bb5..accae0fa9868 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -89,6 +89,9 @@ static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
 cpumask_t cpus_booted_once_mask;
 #endif
 
+RAW_NOTIFIER_HEAD(smt_notifier_head);
+EXPORT_SYMBOL(smt_notifier_head);
+
 #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
 static struct lockdep_map cpuhp_state_up_map =
STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", _state_up_map);
@@ -2281,8 +2284,10 @@ int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
 */
cpuhp_offline_cpu_device(cpu);
}
-   if (!ret)
+   if (!ret) {
cpu_smt_control = ctrlval;
+   raw_notifier_call_chain(_notifier_head, SMT_DISABLED, NULL);
+   }
cpu_maps_update_done();
return ret;
 }
@@ -2303,7 +2308,11 @@ int cpuhp_smt_enable(void)
/* See comment in cpuhp_smt_disable() */
cpuhp_online_cpu_device(cpu);
}
+   if (!ret)
+   raw_notifier_call_chain(_notifier_head, SMT_ENABLED, NULL);
+
cpu_maps_update_done();
+
return ret;
 }
 #endif
-- 
2.39.2



[PATCH v1 2/3] drm/amd/pm: send the SMT-enable message to pmfw

2023-03-21 Thread Wenyou Yang
When the CPU SMT status change in the fly, sent the SMT-enable
message to pmfw to notify it that the SMT status changed.

Signed-off-by: Wenyou Yang 
---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 41 +++
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  5 +++
 2 files changed, 46 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index b5d64749990e..5cd85a9d149d 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -22,6 +22,7 @@
 
 #define SWSMU_CODE_LAYER_L1
 
+#include 
 #include 
 #include 
 
@@ -69,6 +70,14 @@ static int smu_set_fan_speed_rpm(void *handle, uint32_t 
speed);
 static int smu_set_gfx_cgpg(struct smu_context *smu, bool enabled);
 static int smu_set_mp1_state(void *handle, enum pp_mp1_state mp1_state);
 
+static int smt_notifier_callback(struct notifier_block *nb, unsigned long 
action, void *data);
+
+extern struct raw_notifier_head smt_notifier_head;
+
+static struct notifier_block smt_notifier = {
+   .notifier_call = smt_notifier_callback,
+};
+
 static int smu_sys_get_pp_feature_mask(void *handle,
   char *buf)
 {
@@ -625,6 +634,8 @@ static int smu_set_funcs(struct amdgpu_device *adev)
return 0;
 }
 
+static struct smu_context *current_smu;
+
 static int smu_early_init(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -645,6 +656,7 @@ static int smu_early_init(void *handle)
mutex_init(>message_lock);
 
adev->powerplay.pp_handle = smu;
+   current_smu = smu;
adev->powerplay.pp_funcs = _pm_funcs;
 
r = smu_set_funcs(adev);
@@ -1105,6 +1117,8 @@ static int smu_sw_init(void *handle)
if (!smu->ppt_funcs->get_fan_control_mode)
smu->adev->pm.no_fan = true;
 
+   raw_notifier_chain_register(_notifier_head, _notifier);
+
return 0;
 }
 
@@ -1122,6 +1136,8 @@ static int smu_sw_fini(void *handle)
 
smu_fini_microcode(smu);
 
+   raw_notifier_chain_unregister(_notifier_head, _notifier);
+
return 0;
 }
 
@@ -3241,3 +3257,28 @@ int smu_send_hbm_bad_channel_flag(struct smu_context 
*smu, uint32_t size)
 
return ret;
 }
+
+static int smu_set_cpu_smt_enable(struct smu_context *smu, bool enable)
+{
+   int ret = -EINVAL;
+
+   if (smu->ppt_funcs && smu->ppt_funcs->set_cpu_smt_enable)
+   ret = smu->ppt_funcs->set_cpu_smt_enable(smu, enable);
+
+   return ret;
+}
+
+static int smt_notifier_callback(struct notifier_block *nb,
+unsigned long action, void *data)
+{
+   struct smu_context *smu = current_smu;
+   int ret = NOTIFY_OK;
+
+   ret = (action == SMT_ENABLED) ?
+   smu_set_cpu_smt_enable(smu, true) :
+   smu_set_cpu_smt_enable(smu, false);
+   if (ret)
+   ret = NOTIFY_BAD;
+
+   return ret;
+}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 09469c750a96..7c6594bba796 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -1354,6 +1354,11 @@ struct pptable_funcs {
 * @init_pptable_microcode: Prepare the pptable microcode to upload via 
PSP
 */
int (*init_pptable_microcode)(struct smu_context *smu);
+
+   /**
+* @set_cpu_smt_enable: Set the CPU SMT status
+*/
+   int (*set_cpu_smt_enable)(struct smu_context *smu, bool enable);
 };
 
 typedef enum {
-- 
2.39.2



[PATCH v1 0/3] send message to pmfw when SMT changes

2023-03-21 Thread Wenyou Yang
When the CPU SMT changes on the fly, send the message to pmfw
to notify the SMT status changed.

Wenyou Yang (3):
  cpu/smt: add a notifier to notify the SMT changes
  drm/amd/pm: send the SMT-enable message to pmfw
  drm/amd/pm: vangogh: support to send SMT enable message

 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 41 +++
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  5 +++
 .../pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h|  3 +-
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h  |  3 +-
 .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c  | 19 +
 include/linux/cpu.h   |  5 +++
 kernel/cpu.c  | 11 -
 7 files changed, 84 insertions(+), 3 deletions(-)

-- 
2.39.2



[PATCH] drm/amd/display: Clean up some inconsistent indenting

2023-03-21 Thread Jiapeng Chong
No functional modification involved.

Reported-by: Abaci Robot 
Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=4585
Signed-off-by: Jiapeng Chong 
---
 drivers/gpu/drm/amd/display/modules/power/power_helpers.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c 
b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
index fa469de3e935..0d3a983cb9ec 100644
--- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
+++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
@@ -758,8 +758,8 @@ bool dmcu_load_iram(struct dmcu *dmcu,
 
if (dmcu->dmcu_version.abm_version == 0x24) {
fill_iram_v_2_3((struct iram_table_v_2_2 *)ram_table, params, 
true);
-   result = dmcu->funcs->load_iram(
-   dmcu, 0, (char *)(_table), 
IRAM_RESERVE_AREA_START_V2_2);
+   result = dmcu->funcs->load_iram(dmcu, 0, (char *)(_table),
+   IRAM_RESERVE_AREA_START_V2_2);
} else if (dmcu->dmcu_version.abm_version == 0x23) {
fill_iram_v_2_3((struct iram_table_v_2_2 *)ram_table, params, 
true);
 
-- 
2.20.1.7.g153144c



[PATCH] drm/amd/display: Remove the unused variable dppclk_delay_subtotal

2023-03-21 Thread Jiapeng Chong
Variable dppclk_delay_subtotal is not effectively used, so delete it.

drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn314/display_rq_dlg_calc_314.c:1004:15:
 warning: variable 'dppclk_delay_subtotal' set but not used.

Reported-by: Abaci Robot 
Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=4584
Signed-off-by: Jiapeng Chong 
---
 .../display/dc/dml/dcn314/display_rq_dlg_calc_314.c| 10 --
 1 file changed, 10 deletions(-)

diff --git 
a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c
index 6576b897a512..d1c2693a2e28 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c
@@ -1001,7 +1001,6 @@ static void dml_rq_dlg_get_dlg_params(
unsigned int vupdate_width;
unsigned int vready_offset;
 
-   unsigned int dppclk_delay_subtotal;
unsigned int dispclk_delay_subtotal;
 
unsigned int vstartup_start;
@@ -1130,17 +1129,8 @@ static void dml_rq_dlg_get_dlg_params(
vupdate_offset = dst->vupdate_offset;
vupdate_width = dst->vupdate_width;
vready_offset = dst->vready_offset;
-
-   dppclk_delay_subtotal = mode_lib->ip.dppclk_delay_subtotal;
dispclk_delay_subtotal = mode_lib->ip.dispclk_delay_subtotal;
 
-   if (scl_enable)
-   dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl;
-   else
-   dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl_lb_only;
-
-   dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_cnvc_formatter + 
src->num_cursors * mode_lib->ip.dppclk_delay_cnvc_cursor;
-
if (dout->dsc_enable) {
double dsc_delay = get_dsc_delay(mode_lib, e2e_pipe_param, 
num_pipes, pipe_idx); // FROM VBA
 
-- 
2.20.1.7.g153144c



RE: [PATCH] drm/amdgpu: skip unload tmr when tmr is not loaded

2023-03-21 Thread Chen, Guchun



> -Original Message-
> From: Tong Liu01 
> Sent: Wednesday, March 22, 2023 10:37 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Quan, Evan ; Chen, Horace
> ; Tuikov, Luben ;
> Koenig, Christian ; Deucher, Alexander
> ; Xiao, Jack ; Zhang,
> Hawking ; Liu, Monk ; Xu,
> Feifei ; Wang, Yang(Kevin)
> ; Chen, Guchun ;
> Liu01, Tong (Esther) 
> Subject: [PATCH] drm/amdgpu: skip unload tmr when tmr is not loaded
> 
> [why]
> For Navi12 and CHIP_SIENNA_CICHLID SRIOV, TMR is not loaded. Should also
> skip tmr unload
> 
> Signed-off-by: Tong Liu01 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 10 +-
>  1 file changed, 9 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> index 0b9e99c35a05..0a3d9f7e277b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> @@ -839,7 +839,15 @@ static void psp_prep_tmr_unload_cmd_buf(struct
> psp_context *psp,  static int psp_tmr_unload(struct psp_context *psp)  {
>   int ret;
> - struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
> + struct psp_gfx_cmd_resp *cmd;
> +
> + /* For Navi12 and CHIP_SIENNA_CICHLID SRIOV, do not set up TMR.
> +  * Already set up by host driver.
> +  */

I would say something like "skip TMR unload for Navi12 and CHIP_SIENNA_CICHLID 
SRIOV, as TMR is not loaded at all."

With above comment improved, the patch is:
Reviewed-by: Guchun Chen 

Regards,
Guchun

> + if (amdgpu_sriov_vf(psp->adev) && psp_skip_tmr(psp))
> + return 0;
> +
> + cmd = acquire_psp_cmd_buf(psp);
> 
>   psp_prep_tmr_unload_cmd_buf(psp, cmd);
>   dev_dbg(psp->adev->dev, "free PSP TMR buffer\n");
> --
> 2.34.1



[PATCH] drm/amdgpu: skip unload tmr when tmr is not loaded

2023-03-21 Thread Tong Liu01
[why]
For Navi12 and CHIP_SIENNA_CICHLID SRIOV, TMR is not loaded. Should
also skip tmr unload

Signed-off-by: Tong Liu01 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 0b9e99c35a05..0a3d9f7e277b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -839,7 +839,15 @@ static void psp_prep_tmr_unload_cmd_buf(struct psp_context 
*psp,
 static int psp_tmr_unload(struct psp_context *psp)
 {
int ret;
-   struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
+   struct psp_gfx_cmd_resp *cmd;
+
+   /* For Navi12 and CHIP_SIENNA_CICHLID SRIOV, do not set up TMR.
+* Already set up by host driver.
+*/
+   if (amdgpu_sriov_vf(psp->adev) && psp_skip_tmr(psp))
+   return 0;
+
+   cmd = acquire_psp_cmd_buf(psp);
 
psp_prep_tmr_unload_cmd_buf(psp, cmd);
dev_dbg(psp->adev->dev, "free PSP TMR buffer\n");
-- 
2.34.1



RE: [PATCH] drm/amdgpu: skip unload tmr when tmr is not loaded

2023-03-21 Thread Chen, Guchun
[Public]

 struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);

+   /* For Navi12 and CHIP_SIENNA_CICHLID SRIOV, do not set up TMR.
+* Already set up by host driver.
+*/
+   if (amdgpu_sriov_vf(psp->adev) && psp_skip_tmr(psp))
+   return 0;

This will lead to a psp lock leak by acquire_psp_cmd_buf. It needs to put 'cmd 
= acquire_psp_cmd_buf(psp);' after SRIOV check.

Regards,
Guchun

From: amd-gfx  On Behalf Of Deucher, 
Alexander
Sent: Tuesday, March 21, 2023 10:30 PM
To: Liu01, Tong (Esther) ; amd-gfx@lists.freedesktop.org
Cc: Xiao, Jack ; Chen, Horace ; Tuikov, 
Luben ; Quan, Evan ; Koenig, Christian 
; Zhang, Hawking 
Subject: Re: [PATCH] drm/amdgpu: skip unload tmr when tmr is not loaded


[Public]


[Public]

Reviewed-by: Alex Deucher 
mailto:alexander.deuc...@amd.com>>

From: Tong Liu01 mailto:tong.li...@amd.com>>
Sent: Tuesday, March 21, 2023 5:19 AM
To: amd-gfx@lists.freedesktop.org 
mailto:amd-gfx@lists.freedesktop.org>>
Cc: Quan, Evan mailto:evan.q...@amd.com>>; Chen, Horace 
mailto:horace.c...@amd.com>>; Tuikov, Luben 
mailto:luben.tui...@amd.com>>; Koenig, Christian 
mailto:christian.koe...@amd.com>>; Deucher, Alexander 
mailto:alexander.deuc...@amd.com>>; Xiao, Jack 
mailto:jack.x...@amd.com>>; Zhang, Hawking 
mailto:hawking.zh...@amd.com>>; Liu01, Tong (Esther) 
mailto:tong.li...@amd.com>>
Subject: [PATCH] drm/amdgpu: skip unload tmr when tmr is not loaded

[why]
For Navi12 and CHIP_SIENNA_CICHLID SRIOV, TMR is not loaded. Should
also skip tmr unload

Signed-off-by: Tong Liu01 mailto:tong.li...@amd.com>>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 0b9e99c35a05..69addf2751aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -841,6 +841,12 @@ static int psp_tmr_unload(struct psp_context *psp)
 int ret;
 struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);

+   /* For Navi12 and CHIP_SIENNA_CICHLID SRIOV, do not set up TMR.
+* Already set up by host driver.
+*/
+   if (amdgpu_sriov_vf(psp->adev) && psp_skip_tmr(psp))
+   return 0;
+
 psp_prep_tmr_unload_cmd_buf(psp, cmd);
 dev_dbg(psp->adev->dev, "free PSP TMR buffer\n");

--
2.34.1


Re: [PATCH 25/32] drm/amdkfd: add debug suspend and resume process queues operation

2023-03-21 Thread Felix Kuehling



On 2023-01-25 14:53, Jonathan Kim wrote:

In order to inspect waves from the saved context at any point during a
debug session, the debugger must be able to preempt queues to trigger
context save by suspending them.

On queue suspend, the KFD will copy the context save header information
so that the debugger can correctly crawl the appropriate size of the saved
context. The debugger must then also be allowed to resume suspended queues.

A queue that is newly created cannot be suspended because queue ids are
recycled after destruction so the debugger needs to know that this has
occurred.  Query functions will be later added that will clear a given
queue of its new queue status.

A queue cannot be destroyed while it is suspended to preserve its saved
context during debugger inspection.  Have queue destruction block while
a queue is suspended and unblocked when it is resumed.  Likewise, if a
queue is about to be destroyed, it cannot be suspended.

Return the number of queues successfully suspended or resumed along with
a per queue status array where the upper bits per queue status show that
the request was invalid (new/destroyed queue suspend request, missing
queue) or an error occurred (HWS in a fatal state so it can't suspend or
resume queues).

v2: add gfx11/mes support.
prevent header copy on suspend from overwriting user fields.
simplify resume_queues function.
address other nit-picks

Signed-off-by: Jonathan Kim 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c|   5 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|   1 +
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  |  11 +
  drivers/gpu/drm/amd/amdkfd/kfd_debug.c|   7 +
  .../drm/amd/amdkfd/kfd_device_queue_manager.c | 446 +-
  .../drm/amd/amdkfd/kfd_device_queue_manager.h |  10 +
  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  |  14 +
  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  |  11 +-
  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   |  18 +-
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |   5 +-
  10 files changed, 518 insertions(+), 10 deletions(-)


[snip]

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 50da16dd4c96..047c43418a1a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -288,6 +288,11 @@ static int get_wave_state(struct mqd_manager *mm, void 
*mqd,
  u32 *save_area_used_size)
  {
struct v9_mqd *m;
+   struct kfd_context_save_area_header header;
+   size_t header_copy_size = sizeof(header.control_stack_size) +
+   sizeof(header.wave_state_size) +
+   sizeof(header.wave_state_offset) +
+   sizeof(header.control_stack_offset);


This makes assumptions about the structure layout. I'd feel better if 
these fields were in a sub-structure, which would make this easier and 
safer to handle.


struct kfd_context_save_area_header {
struct {
__u32 control_stack_offset;
__u32 control_stack_size;
__u32 wave_state_offset;
__u32 wave_state_size;
} wave_state;
...
};

...

|static int get_wave_state(...) { struct kfd_context_save_area_header 
header; ... header.wave_state.control_stack_size = *ctl_stack_used_size; 
header.wave_state.wave_state_size = *save_area_used_size; 
header.wave_state.wave_state_offset = m->cp_hqd_wg_state_offset; 
header.wave_state.control_stack_offset = m->cp_hqd_cntl_stack_offset; if 
(copy_to_user(ctl_stack, _state, sizeof(header.wave_state))) 
return -EFAULT; ... } |


This way you're sure you only copy initialized data. The only assumption 
this still makes is, that wave_state is at the start of the header 
structure.


Regards,
  Felix


  
  	/* Control stack is located one page after MQD. */

void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
@@ -299,7 +304,18 @@ static int get_wave_state(struct mqd_manager *mm, void 
*mqd,
*save_area_used_size = m->cp_hqd_wg_state_offset -
m->cp_hqd_cntl_stack_size;
  
-	if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size))

+   header.control_stack_size = *ctl_stack_used_size;
+   header.wave_state_size = *save_area_used_size;
+
+   header.wave_state_offset = m->cp_hqd_wg_state_offset;
+   header.control_stack_offset = m->cp_hqd_cntl_stack_offset;
+
+   if (copy_to_user(ctl_stack, , header_copy_size))
+   return -EFAULT;
+
+   if (copy_to_user(ctl_stack + m->cp_hqd_cntl_stack_offset,
+   mqd_ctl_stack + m->cp_hqd_cntl_stack_offset,
+   *ctl_stack_used_size))
return -EFAULT;
  
  	return 0;

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 6f7dc23af104..8dc7cc1e18a5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ 

Re: [PATCH 24/32] drm/amdkfd: add debug wave launch mode operation

2023-03-21 Thread Felix Kuehling

On 2023-01-25 14:53, Jonathan Kim wrote:

Allow the debugger to set wave behaviour on to either normally operate,
halt at launch, trap on every instruction, terminate immediately or
stall on allocation.

v2: add gfx11 support and remove deprecated launch mode options

Signed-off-by: Jonathan Kim 
---
  .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  | 12 +++
  .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |  1 +
  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 25 +
  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h|  3 ++
  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c  |  3 +-
  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c| 14 +++-
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 25 +
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |  3 ++
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  |  3 ++
  drivers/gpu/drm/amd/amdkfd/kfd_debug.c| 36 ++-
  drivers/gpu/drm/amd/amdkfd/kfd_debug.h|  5 ++-
  11 files changed, 124 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
index 84a9d9391ea4..4de2066215b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -107,6 +107,17 @@ static uint32_t 
kgd_aldebaran_set_wave_launch_trap_override(struct amdgpu_device
return data;
  }
  
+static uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev,

+   uint8_t wave_launch_mode,
+   uint32_t vmid)
+{
+   uint32_t data = 0;
+
+   data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, LAUNCH_MODE, 
wave_launch_mode);
+
+   return data;
+}
+
  const struct kfd2kgd_calls aldebaran_kfd2kgd = {
.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
@@ -128,6 +139,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
.disable_debug_trap = kgd_aldebaran_disable_debug_trap,
.validate_trap_override_request = 
kgd_aldebaran_validate_trap_override_request,
.set_wave_launch_trap_override = 
kgd_aldebaran_set_wave_launch_trap_override,
+   .set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
.build_grace_period_packet_info = 
kgd_gfx_v9_build_grace_period_packet_info,
.program_trap_handler_settings = 
kgd_gfx_v9_program_trap_handler_settings,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index 0405725e95e3..500013540356 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -412,6 +412,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
.disable_debug_trap = kgd_arcturus_disable_debug_trap,
.validate_trap_override_request = 
kgd_gfx_v9_validate_trap_override_request,
.set_wave_launch_trap_override = 
kgd_gfx_v9_set_wave_launch_trap_override,
+   .set_wave_launch_mode = kgd_gfx_v9_set_wave_launch_mode,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
.build_grace_period_packet_info = 
kgd_gfx_v9_build_grace_period_packet_info,
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 32a6e5fbeacd..7591145bc69f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -854,6 +854,30 @@ uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct 
amdgpu_device *adev,
return 0;
  }
  
+uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,

+   uint8_t wave_launch_mode,
+   uint32_t vmid)
+{
+   uint32_t data = 0;
+   bool is_mode_set = !!wave_launch_mode;
+
+   mutex_lock(>grbm_idx_mutex);
+
+   kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
+
+   data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
+   VMID_MASK, is_mode_set ? 1 << vmid : 0);
+   data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
+   MODE, is_mode_set ? wave_launch_mode : 0);
+   WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data);
+
+   kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
+
+   mutex_unlock(>grbm_idx_mutex);
+
+   return 0;
+}
+
  /* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
   * The values read are:
   * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
@@ -941,6 +965,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
.disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
.validate_trap_override_request = 

Re: [PATCH 23/32] drm/amdkfd: add debug wave launch override operation

2023-03-21 Thread Felix Kuehling



On 2023-01-25 14:53, Jonathan Kim wrote:

This operation allows the debugger to override the enabled HW
exceptions on the device.

On debug devices that only support the debugging of a single process,
the HW exceptions are global and set through the SPI_GDBG_TRAP_MASK
register.
Because they are global, only address watch exceptions are allowed to
be enabled.  In other words, the debugger must preserve all non-address
watch exception states in normal mode operation by barring a full
replacement override or a non-address watch override request.

For multi-process debugging, all HW exception overrides are per-VMID so
all exceptions can be overridden or fully replaced.

In order for the debugger to know what is permissible, returned the
supported override mask back to the debugger along with the previously
enable overrides.

v3: v2 was reviewed but requesting re-review for GFX11 added supported.

v2: switch unsupported override mode return from EPERM to EINVAL to
support unique EPERM on PTRACE failure.

Signed-off-by: Jonathan Kim 
---
  .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  | 47 ++
  .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |  2 +
  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 55 
  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h| 10 +++
  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c  |  5 +-
  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c| 86 ++-
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 55 
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 10 +++
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  |  7 ++
  drivers/gpu/drm/amd/amdkfd/kfd_debug.c| 69 +++
  drivers/gpu/drm/amd/amdkfd/kfd_debug.h|  6 ++
  11 files changed, 350 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
index a64a53f9efe6..84a9d9391ea4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -25,6 +25,7 @@
  #include "amdgpu_amdkfd_gfx_v9.h"
  #include "gc/gc_9_4_2_offset.h"
  #include "gc/gc_9_4_2_sh_mask.h"
+#include 
  
  /**

   * Returns TRAP_EN, EXCP_EN and EXCP_REPLACE.
@@ -62,6 +63,50 @@ static uint32_t kgd_aldebaran_disable_debug_trap(struct 
amdgpu_device *adev,
return data;
  }
  
+static int kgd_aldebaran_validate_trap_override_request(struct amdgpu_device *adev,

+   uint32_t trap_override,
+   uint32_t 
*trap_mask_supported)
+{
+   *trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID |
+   KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+   KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+   KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+   KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+   KFD_DBG_TRAP_MASK_FP_INEXACT |
+   KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+   KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+   KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION;
+
+   if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR &&
+   trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE)
+   return -EPERM;
+
+   return 0;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_RPLACE. */
+static uint32_t kgd_aldebaran_set_wave_launch_trap_override(struct 
amdgpu_device *adev,
+   uint32_t vmid,
+   uint32_t trap_override,
+   uint32_t trap_mask_bits,
+   uint32_t trap_mask_request,
+   uint32_t *trap_mask_prev,
+   uint32_t kfd_dbg_trap_cntl_prev)
+
+{
+   uint32_t data = 0;
+
+   *trap_mask_prev = REG_GET_FIELD(kfd_dbg_trap_cntl_prev, 
SPI_GDBG_PER_VMID_CNTL, EXCP_EN);
+   trap_mask_bits = (trap_mask_bits & trap_mask_request) |
+   (*trap_mask_prev & ~trap_mask_request);
+
+   data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+   data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 
trap_mask_bits);
+   data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 
trap_override);
+
+   return data;
+}
+
  const struct kfd2kgd_calls aldebaran_kfd2kgd = {
.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
@@ -81,6 +126,8 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
.set_vm_context_page_table_base = 
kgd_gfx_v9_set_vm_context_page_table_base,
.enable_debug_trap = kgd_aldebaran_enable_debug_trap,
.disable_debug_trap = kgd_aldebaran_disable_debug_trap,
+   

Re: [PATCH 21/32] drm/amdkfd: update process interrupt handling for debug events

2023-03-21 Thread Felix Kuehling



On 2023-01-25 14:53, Jonathan Kim wrote:

The debugger must be notified by any debugger subscribed exception
that comes from hardware interrupts.

If a debugger session exits, any exceptions it subscribed to may still
have interrupts in the interrupt ring buffer or KGD/KFD pipeline.
To prevent a new session from inheriting stale interrupts, when a new
queue is created, open an interrupt drain and allow the IH ring to drain
from a timestamped checkpoint.  Then inject a custom IV so that once
the custom IV is picked up by the KFD, it's safe to close the drain
and proceed with queue creation.

The drain must also be on debug disable as SW interrupts may still
be processed.  Drain at this time and clear all the exception status.

The debugger may also not be attached nor subscibed to certain
exceptions so forward them directly to the runtime.

GFX10 also requires its own IV processing, hence the creation of
kfd_int_process_v10.c.  This is because the IV from SQ interrupts are
packed into a new continguous format unlike GFX9. To make this clear,
a separate interrupting handling code file was created.

v3: enable gfx11 interrupts
v2: fix interrupt drain on debug disable.
fix interrupt drain on queue create during -ERESTARTSYS.
fix up macros naming for ECODE parsing.

Signed-off-by: Jonathan Kim 


Some indentation nit-picks inline. With those fixed, the patch is

Reviewed-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c|  16 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|   2 +
  drivers/gpu/drm/amd/amdkfd/Makefile   |   1 +
  drivers/gpu/drm/amd/amdkfd/kfd_debug.c|  85 
  drivers/gpu/drm/amd/amdkfd/kfd_debug.h|   6 +
  drivers/gpu/drm/amd/amdkfd/kfd_device.c   |   4 +-
  .../gpu/drm/amd/amdkfd/kfd_int_process_v10.c  | 405 ++
  .../gpu/drm/amd/amdkfd/kfd_int_process_v11.c  |  21 +-
  .../gpu/drm/amd/amdkfd/kfd_int_process_v9.c   |  98 -
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  12 +
  drivers/gpu/drm/amd/amdkfd/kfd_process.c  |  47 ++
  .../amd/amdkfd/kfd_process_queue_manager.c|   4 +
  12 files changed, 681 insertions(+), 20 deletions(-)
  create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 8816853e50c0..60c3b0449d86 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -763,6 +763,22 @@ void amdgpu_amdkfd_ras_poison_consumption_handler(struct 
amdgpu_device *adev, bo
amdgpu_umc_poison_handler(adev, reset);
  }
  
+int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,

+   uint32_t *payload)
+{
+   int ret;
+
+   /* Device or IH ring is not ready so bail. */
+   ret = amdgpu_ih_wait_on_checkpoint_process_ts(adev, >irq.ih);
+   if (ret)
+   return ret;
+
+   /* Send payload to fence KFD interrupts */
+   amdgpu_amdkfd_interrupt(adev, payload);
+
+   return 0;
+}
+
  bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev)
  {
if (adev->gfx.ras && adev->gfx.ras->query_utcl2_poison_status)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 333780491867..df782274a4c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -241,6 +241,8 @@ int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct 
amdgpu_device *dst,
struct amdgpu_device *src,
bool is_min);
  int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool 
is_min);
+int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
+   uint32_t *payload);
  
  /* Read user wptr from a specified user address space with page fault

   * disabled. The memory must be pinned and mapped to the hardware when
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile 
b/drivers/gpu/drm/amd/amdkfd/Makefile
index 747754428073..2ec8f27c5366 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -53,6 +53,7 @@ AMDKFD_FILES  := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_events.o \
$(AMDKFD_PATH)/cik_event_interrupt.o \
$(AMDKFD_PATH)/kfd_int_process_v9.o \
+   $(AMDKFD_PATH)/kfd_int_process_v10.o \
$(AMDKFD_PATH)/kfd_int_process_v11.o \
$(AMDKFD_PATH)/kfd_smi_events.o \
$(AMDKFD_PATH)/kfd_crat.o \
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index 16acf3d416eb..0c876172db4b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -125,6 +125,65 @@ bool kfd_dbg_ev_raise(uint64_t 

Re: [PATCH 07/11] drm/amdgpu: add UAPI to query GFX shadow sizes

2023-03-21 Thread Alex Deucher
On Mon, Mar 20, 2023 at 8:31 PM Marek Olšák  wrote:
>
> On Mon, Mar 20, 2023 at 1:38 PM Alex Deucher  
> wrote:
>>
>> Add UAPI to query the GFX shadow buffer requirements
>> for preemption on GFX11.  UMDs need to specify the shadow
>> areas for preemption.
>>
>> Signed-off-by: Alex Deucher 
>> ---
>>  include/uapi/drm/amdgpu_drm.h | 10 ++
>>  1 file changed, 10 insertions(+)
>>
>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
>> index 3d9474af6566..19a806145371 100644
>> --- a/include/uapi/drm/amdgpu_drm.h
>> +++ b/include/uapi/drm/amdgpu_drm.h
>> @@ -886,6 +886,7 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow {
>> #define AMDGPU_INFO_VIDEO_CAPS_DECODE   0
>> /* Subquery id: Encode */
>> #define AMDGPU_INFO_VIDEO_CAPS_ENCODE   1
>> +#define AMDGPU_INFO_CP_GFX_SHADOW_SIZE 0x22
>
>
> Can you put this into the device structure instead? Let's minimize the number 
> of kernel queries as much as possible.

I guess, but one nice thing about this is that we can use the query as
a way to determine if the kernel supports this functionality or not.
If not, the query returns -ENOTSUP.

Alex


>
> Thanks,
> Marek
>


Re: [PATCH 07/11] drm/amdgpu: add UAPI to query GFX shadow sizes

2023-03-21 Thread Alex Deucher
On Mon, Mar 20, 2023 at 8:30 PM Marek Olšák  wrote:
>
>
> On Mon, Mar 20, 2023 at 1:38 PM Alex Deucher  
> wrote:
>>
>> Add UAPI to query the GFX shadow buffer requirements
>> for preemption on GFX11.  UMDs need to specify the shadow
>> areas for preemption.
>>
>> Signed-off-by: Alex Deucher 
>> ---
>>  include/uapi/drm/amdgpu_drm.h | 10 ++
>>  1 file changed, 10 insertions(+)
>>
>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
>> index 3d9474af6566..19a806145371 100644
>> --- a/include/uapi/drm/amdgpu_drm.h
>> +++ b/include/uapi/drm/amdgpu_drm.h
>> @@ -886,6 +886,7 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow {
>> #define AMDGPU_INFO_VIDEO_CAPS_DECODE   0
>> /* Subquery id: Encode */
>> #define AMDGPU_INFO_VIDEO_CAPS_ENCODE   1
>> +#define AMDGPU_INFO_CP_GFX_SHADOW_SIZE 0x22
>>
>>  #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0
>>  #define AMDGPU_INFO_MMR_SE_INDEX_MASK  0xff
>> @@ -1203,6 +1204,15 @@ struct drm_amdgpu_info_video_caps {
>> struct drm_amdgpu_info_video_codec_info 
>> codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_COUNT];
>>  };
>>
>> +struct drm_amdgpu_info_cp_gfx_shadow_size {
>> +   __u32 shadow_size;
>> +   __u32 shadow_alignment;
>> +   __u32 csa_size;
>> +   __u32 csa_alignment;
>> +   __u32 gds_size;
>> +   __u32 gds_alignment;
>
>
> Can you document the fields? What is CSA? Also, why is GDS there when the hw 
> deprecated it and replaced it with GDS registers?

Will add documentation.  For reference:
CSA (Context Save Area) - used as a scratch area for FW for saving
various things
Shadow - stores the pipeline state
GDS backup - stores the GDS state used by the pipeline.  I'm not sure
if this is registers or the old GDS memory.  Presumably the former.

Alex

>
> Thanks,
> Marek


[PATCH] drm/amd/display: Slightly optimize dm_dmub_outbox1_low_irq()

2023-03-21 Thread Christophe JAILLET
A kzalloc()+memcpy() can be optimized in a single kmemdup().
This saves a few cycles because some memory doesn't need to be zeroed.

Signed-off-by: Christophe JAILLET 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 5bac5781a06b..57a5fbdab890 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -820,15 +820,14 @@ static void dm_dmub_outbox1_low_irq(void 
*interrupt_params)
DRM_ERROR("Failed to allocate 
dmub_hpd_wrk");
return;
}
-   dmub_hpd_wrk->dmub_notify = 
kzalloc(sizeof(struct dmub_notification), GFP_ATOMIC);
+   dmub_hpd_wrk->dmub_notify = kmemdup(, 
sizeof(struct dmub_notification),
+   GFP_ATOMIC);
if (!dmub_hpd_wrk->dmub_notify) {
kfree(dmub_hpd_wrk);
DRM_ERROR("Failed to allocate 
dmub_hpd_wrk->dmub_notify");
return;
}
INIT_WORK(_hpd_wrk->handle_hpd_work, 
dm_handle_hpd_work);
-   if (dmub_hpd_wrk->dmub_notify)
-   memcpy(dmub_hpd_wrk->dmub_notify, 
, sizeof(struct dmub_notification));
dmub_hpd_wrk->adev = adev;
if (notify.type == DMUB_NOTIFICATION_HPD) {
plink = 
adev->dm.dc->links[notify.link_index];
-- 
2.32.0



RE: [PATCH] drm/amdgpu: add print for iommu translation mode

2023-03-21 Thread Sider, Graham
[Public]

> -Original Message-
> From: Christian König 
> Sent: Tuesday, March 21, 2023 2:53 PM
> To: Sider, Graham ; Russell, Kent
> ; Mahfooz, Hamza ;
> amd-gfx@lists.freedesktop.org
> Cc: Kuehling, Felix 
> Subject: Re: [PATCH] drm/amdgpu: add print for iommu translation mode
> 
> Caution: This message originated from an External Source. Use proper
> caution when opening attachments, clicking links, or responding.
> 
> 
> Am 17.03.23 um 21:04 schrieb Sider, Graham:
> > [AMD Official Use Only - General]
> >
> >
> >
> >> -Original Message-
> >> From: Russell, Kent 
> >> Sent: Friday, March 17, 2023 3:58 PM
> >> To: Mahfooz, Hamza ; Sider, Graham
> >> ; amd-gfx@lists.freedesktop.org
> >> Cc: Kuehling, Felix 
> >> Subject: RE: [PATCH] drm/amdgpu: add print for iommu translation mode
> >>
> >> [AMD Official Use Only - General]
> >>
> >>
> >>
> >>> -Original Message-
> >>> From: amd-gfx  On Behalf Of
> >>> Hamza Mahfooz
> >>> Sent: Friday, March 17, 2023 3:58 PM
> >>> To: Sider, Graham ;
> >>> amd-gfx@lists.freedesktop.org
> >>> Cc: Kuehling, Felix 
> >>> Subject: Re: [PATCH] drm/amdgpu: add print for iommu translation
> >>> mode
> >>>
> >>>
> >>> On 3/17/23 15:47, Graham Sider wrote:
>  Add log to display whether RAM is direct vs DMA mapped.
> 
>  Signed-off-by: Graham Sider 
> >>> If this information is only useful for debugging purposes, please
> >>> use
> >>> drm_dbg() instead of pr_info().
> > It's useful for more than just debug I would say. Just a quick way to grep
> whether IOMMU is off/pt vs device isolation mode.
> 
> Mhm, shouldn't the IOMMU code note that as well?
>

As of right now, not exactly. Copy-pasting Felix's comment here:

The kernel log [currently] tells you the default IOMMU domain, but it may not 
match the domain actually used for the GPU. Without this message there is no 
easy way to tell from a kernel log. This will help with triaging issues from 
logs provided by external and internal users.

Graham

> 
> Christian.
> 
> >
> > Graham
> >
>  ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +-
> 1 file changed, 5 insertions(+), 1 deletion(-)
> 
>  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>  index 8bba5e6872a1..8797a9523244 100644
>  --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>  +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>  @@ -3528,8 +3528,12 @@ static void
> >>> amdgpu_device_check_iommu_direct_map(struct amdgpu_device
> *adev)
> struct iommu_domain *domain;
> 
> domain = iommu_get_domain_for_dev(adev->dev);
>  -  if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
>  +  if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY) {
>  +  pr_info("RAM is direct mapped to GPU (not traslated by
> >> traslated -> translated
> >>
> > Thanks, my keyboard keeps skipping the on the 'n' key lately :( time for a
> clean.
> >
> > Graham
> >
> >>   Kent
> >>> IOMMU)\n");
> adev->ram_is_direct_mapped = true;
>  +  } else {
>  +  pr_info("RAM is DMA mapped to GPU (translated by
> >>> IOMMU)\n");
>  +  }
> }
> 
> static const struct attribute *amdgpu_dev_attributes[] = {
> >>> --
> >>> Hamza


[PATCH 2/2] drm/amdgpu: add debugfs interface for reading MQDs

2023-03-21 Thread Alex Deucher
Provide a debugfs interface to access the MQD.  Useful for
debugging issues with the CP and MES hardware scheduler.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 60 +++-
 1 file changed, 59 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index dc474b809604..4da67faef668 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -478,6 +478,59 @@ static const struct file_operations 
amdgpu_debugfs_ring_fops = {
.llseek = default_llseek
 };
 
+static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf,
+  size_t size, loff_t *pos)
+{
+   struct amdgpu_ring *ring = file_inode(f)->i_private;
+   volatile u32 *mqd;
+   int r;
+   uint32_t value, result;
+
+   if (*pos & 3 || size & 3)
+   return -EINVAL;
+
+   result = 0;
+
+   r = amdgpu_bo_reserve(ring->mqd_obj, false);
+   if (unlikely(r != 0))
+   return r;
+
+   r = amdgpu_bo_kmap(ring->mqd_obj, (void **));
+   if (r) {
+   amdgpu_bo_unreserve(ring->mqd_obj);
+   return r;
+   }
+
+   while (size) {
+   if (*pos >= ring->mqd_size)
+   return result;
+
+   value = mqd[*pos/4];
+   r = put_user(value, (uint32_t *)buf);
+   if (r)
+   goto done;
+   buf += 4;
+   result += 4;
+   size -= 4;
+   *pos += 4;
+   }
+
+done:
+   amdgpu_bo_kunmap(ring->mqd_obj);
+   mqd = NULL;
+   amdgpu_bo_unreserve(ring->mqd_obj);
+   if (r)
+   return r;
+
+   return result;
+}
+
+static const struct file_operations amdgpu_debugfs_mqd_fops = {
+   .owner = THIS_MODULE,
+   .read = amdgpu_debugfs_mqd_read,
+   .llseek = default_llseek
+};
+
 #endif
 
 void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
@@ -492,7 +545,12 @@ void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
debugfs_create_file_size(name, S_IFREG | S_IRUGO, root, ring,
 _debugfs_ring_fops,
 ring->ring_size + 12);
-
+   if (ring->mqd_obj) {
+   sprintf(name, "amdgpu_mqd_%s", ring->name);
+   debugfs_create_file_size(name, S_IFREG | S_IRUGO, root, ring,
+_debugfs_mqd_fops,
+ring->mqd_size);
+   }
 #endif
 }
 
-- 
2.39.2



[PATCH 1/2] drm/amdgpu: track MQD size for gfx and compute

2023-03-21 Thread Alex Deucher
It varies by generation and we need to know the size
to expose this via debugfs.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c  | 2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 +
 2 files changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index c50d59855011..5435f41a3b7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -404,6 +404,7 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
return r;
}
 
+   ring->mqd_size = mqd_size;
/* prepare MQD backup */
adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, 
GFP_KERNEL);
if (!adev->gfx.me.mqd_backup[i])
@@ -424,6 +425,7 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
return r;
}
 
+   ring->mqd_size = mqd_size;
/* prepare MQD backup */
adev->gfx.mec.mqd_backup[i] = kmalloc(mqd_size, 
GFP_KERNEL);
if (!adev->gfx.mec.mqd_backup[i])
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 7942cb62e52c..deb9f7bead02 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -257,6 +257,7 @@ struct amdgpu_ring {
struct amdgpu_bo*mqd_obj;
uint64_tmqd_gpu_addr;
void*mqd_ptr;
+   unsignedmqd_size;
uint64_teop_gpu_addr;
u32 doorbell_index;
booluse_doorbell;
-- 
2.39.2



Re: [RFC PATCH 1/5] x86/xen: disable swiotlb for xen pvh

2023-03-21 Thread Christian König

Am 17.03.23 um 15:45 schrieb Alex Deucher:

On Thu, Mar 16, 2023 at 7:09 PM Stefano Stabellini
 wrote:

On Thu, 16 Mar 2023, Juergen Gross wrote:

On 16.03.23 14:53, Alex Deucher wrote:

On Thu, Mar 16, 2023 at 9:48 AM Juergen Gross  wrote:

On 16.03.23 14:45, Alex Deucher wrote:

On Thu, Mar 16, 2023 at 3:50 AM Jan Beulich  wrote:

On 16.03.2023 00:25, Stefano Stabellini wrote:

On Wed, 15 Mar 2023, Jan Beulich wrote:

On 15.03.2023 01:52, Stefano Stabellini wrote:

On Mon, 13 Mar 2023, Jan Beulich wrote:

On 12.03.2023 13:01, Huang Rui wrote:

Xen PVH is the paravirtualized mode and takes advantage of
hardware
virtualization support when possible. It will using the
hardware IOMMU
support instead of xen-swiotlb, so disable swiotlb if
current domain is
Xen PVH.

But the kernel has no way (yet) to drive the IOMMU, so how can
it get
away without resorting to swiotlb in certain cases (like I/O
to an
address-restricted device)?

I think Ray meant that, thanks to the IOMMU setup by Xen, there
is no
need for swiotlb-xen in Dom0. Address translations are done by
the IOMMU
so we can use guest physical addresses instead of machine
addresses for
DMA. This is a similar case to Dom0 on ARM when the IOMMU is
available
(see include/xen/arm/swiotlb-xen.h:xen_swiotlb_detect, the
corresponding
case is XENFEAT_not_direct_mapped).

But how does Xen using an IOMMU help with, as said,
address-restricted
devices? They may still need e.g. a 32-bit address to be
programmed in,
and if the kernel has memory beyond the 4G boundary not all I/O
buffers
may fulfill this requirement.

In short, it is going to work as long as Linux has guest physical
addresses (not machine addresses, those could be anything) lower
than
4GB.

If the address-restricted device does DMA via an IOMMU, then the
device
gets programmed by Linux using its guest physical addresses (not
machine
addresses).

The 32-bit restriction would be applied by Linux to its choice of
guest
physical address to use to program the device, the same way it does
on
native. The device would be fine as it always uses Linux-provided
<4GB
addresses. After the IOMMU translation (pagetable setup by Xen), we
could get any address, including >4GB addresses, and that is
expected to
work.

I understand that's the "normal" way of working. But whatever the
swiotlb
is used for in baremetal Linux, that would similarly require its use
in
PVH (or HVM) aiui. So unconditionally disabling it in PVH would look
to
me like an incomplete attempt to disable its use altogether on x86.
What
difference of PVH vs baremetal am I missing here?

swiotlb is not usable for GPUs even on bare metal.  They often have
hundreds or megs or even gigs of memory mapped on the device at any
given time.  Also, AMD GPUs support 44-48 bit DMA masks (depending on
the chip family).

But the swiotlb isn't per device, but system global.

Sure, but if the swiotlb is in use, then you can't really use the GPU.
So you get to pick one.

The swiotlb is used only for buffers which are not within the DMA mask of a
device (see dma_direct_map_page()). So an AMD GPU supporting a 44 bit DMA mask
won't use the swiotlb unless you have a buffer above guest physical address of
16TB (so basically never).

Disabling swiotlb in such a guest would OTOH mean, that a device with only
32 bit DMA mask passed through to this guest couldn't work with buffers
above 4GB.

I don't think this is acceptable.

 From the Xen subsystem in Linux point of view, the only thing we need to
do is to make sure *not* to enable swiotlb_xen (yes "swiotlb_xen", not
the global swiotlb) on PVH because it is not needed anyway.

I think we should leave the global "swiotlb" setting alone. The global
swiotlb is not relevant to Xen anyway, and surely baremetal Linux has to
have a way to deal with swiotlb/GPU incompatibilities.

We just have to avoid making things worse on Xen, and for that we just
need to avoid unconditionally enabling swiotlb-xen. If the Xen subsystem
doesn't enable swiotlb_xen/swiotlb, and no other subsystem enables
swiotlb, then we have a good Linux configuration capable of handling the
GPU properly.

Alex, please correct me if I am wrong. How is x86_swiotlb_enable set to
false on native (non-Xen) x86?

In most cases we have an IOMMU enabled and IIRC, TTM has slightly
different behavior for memory allocation depending on whether swiotlb
would be needed or not.


Well "slightly different" is an understatement. We need to disable quite 
a bunch of features to make swiotlb work with GPUs.


Especially userptr and inter device sharing won't work any more.

Regards,
Christian.



Alex




Re: [PATCH] drm/amdgpu: add print for iommu translation mode

2023-03-21 Thread Christian König

Am 17.03.23 um 21:04 schrieb Sider, Graham:

[AMD Official Use Only - General]




-Original Message-
From: Russell, Kent 
Sent: Friday, March 17, 2023 3:58 PM
To: Mahfooz, Hamza ; Sider, Graham
; amd-gfx@lists.freedesktop.org
Cc: Kuehling, Felix 
Subject: RE: [PATCH] drm/amdgpu: add print for iommu translation mode

[AMD Official Use Only - General]




-Original Message-
From: amd-gfx  On Behalf Of
Hamza Mahfooz
Sent: Friday, March 17, 2023 3:58 PM
To: Sider, Graham ;
amd-gfx@lists.freedesktop.org
Cc: Kuehling, Felix 
Subject: Re: [PATCH] drm/amdgpu: add print for iommu translation mode


On 3/17/23 15:47, Graham Sider wrote:

Add log to display whether RAM is direct vs DMA mapped.

Signed-off-by: Graham Sider 

If this information is only useful for debugging purposes, please use
drm_dbg() instead of pr_info().

It's useful for more than just debug I would say. Just a quick way to grep 
whether IOMMU is off/pt vs device isolation mode.


Mhm, shouldn't the IOMMU code note that as well?

Christian.



Graham


---
   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +-
   1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 8bba5e6872a1..8797a9523244 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3528,8 +3528,12 @@ static void

amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)

struct iommu_domain *domain;

domain = iommu_get_domain_for_dev(adev->dev);
-   if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
+   if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY) {
+   pr_info("RAM is direct mapped to GPU (not traslated by

traslated -> translated


Thanks, my keyboard keeps skipping the on the 'n' key lately :( time for a 
clean.

Graham


  Kent

IOMMU)\n");

adev->ram_is_direct_mapped = true;
+   } else {
+   pr_info("RAM is DMA mapped to GPU (translated by

IOMMU)\n");

+   }
   }

   static const struct attribute *amdgpu_dev_attributes[] = {

--
Hamza




Re: BUG: KASAN: slab-use-after-free in drm_sched_get_cleanup_job+0x47b/0x5c0 [gpu_sched]

2023-03-21 Thread Christian König

Hi Mikhail,

Am 20.03.23 um 13:05 schrieb Mikhail Gavrilov:

Hi,
after enabling KASAN literally I was bombarded with messages about
slab-use-after-free in drm_sched_get_cleanup_job.


mhm, interesting.


All messages has similar backtrace:
[ 1138.492091] 
==
[ 1138.492104] BUG: KASAN: slab-use-after-free in
drm_sched_get_cleanup_job+0x47b/0x5c0 [gpu_sched]
[ 1138.492120] Read of size 8 at addr 88815adf04c0 by task sdma1/749

[ 1138.492130] CPU: 29 PID: 749 Comm: sdma1 Tainted: GWL
  ---  ---  6.3.0-0.rc2.20230317git38e04b3e4240.27.fc39.x86_64+debug
#1
[ 1138.492136] Hardware name: System manufacturer System Product
Name/ROG STRIX X570-I GAMING, BIOS 4601 02/02/2023
[ 1138.492141] Call Trace:
[ 1138.492145]  
[ 1138.492150]  dump_stack_lvl+0x72/0xc0
[ 1138.492159]  print_report+0xcf/0x670
[ 1138.492169]  ? drm_sched_get_cleanup_job+0x47b/0x5c0 [gpu_sched]
[ 1138.492181]  ? drm_sched_get_cleanup_job+0x47b/0x5c0 [gpu_sched]
[ 1138.492193]  kasan_report+0xa4/0xe0
[ 1138.492200]  ? drm_sched_get_cleanup_job+0x47b/0x5c0 [gpu_sched]
[ 1138.492215]  drm_sched_get_cleanup_job+0x47b/0x5c0 [gpu_sched]
[ 1138.492229]  drm_sched_main+0x643/0x990 [gpu_sched]
[ 1138.492245]  ? __pfx_drm_sched_main+0x10/0x10 [gpu_sched]
[ 1138.492259]  ? __pfx_autoremove_wake_function+0x10/0x10
[ 1138.492269]  ? __kthread_parkme+0xc1/0x1f0
[ 1138.492277]  ? __pfx_drm_sched_main+0x10/0x10 [gpu_sched]
[ 1138.492288]  kthread+0x29e/0x340
[ 1138.492294]  ? __pfx_kthread+0x10/0x10
[ 1138.492301]  ret_from_fork+0x2c/0x50
[ 1138.492314]  

[ 1138.492320] Allocated by task 10867:
[ 1138.492323]  kasan_save_stack+0x2f/0x50
[ 1138.492329]  kasan_set_track+0x21/0x30
[ 1138.492334]  __kasan_kmalloc+0x8b/0x90
[ 1138.492339]  amdgpu_driver_open_kms+0x10b/0x5a0 [amdgpu]
[ 1138.493112]  drm_file_alloc+0x46e/0x880
[ 1138.493120]  drm_open_helper+0x161/0x460
[ 1138.493126]  drm_open+0x1e7/0x5c0
[ 1138.493131]  drm_stub_open+0x24d/0x400
[ 1138.493138]  chrdev_open+0x215/0x620
[ 1138.493144]  do_dentry_open+0x5f1/0x1000
[ 1138.493149]  path_openat+0x1b3d/0x28a0
[ 1138.493156]  do_filp_open+0x1bd/0x400
[ 1138.493161]  do_sys_openat2+0x140/0x420
[ 1138.493167]  __x64_sys_openat+0x11f/0x1d0
[ 1138.493173]  do_syscall_64+0x5b/0x80
[ 1138.493179]  entry_SYSCALL_64_after_hwframe+0x72/0xdc

[ 1138.493189] Freed by task 10867:
[ 1138.493193]  kasan_save_stack+0x2f/0x50
[ 1138.493199]  kasan_set_track+0x21/0x30
[ 1138.493205]  kasan_save_free_info+0x2a/0x50
[ 1138.493210]  __kasan_slab_free+0x107/0x1a0
[ 1138.493216]  slab_free_freelist_hook+0x11e/0x1d0
[ 1138.493221]  __kmem_cache_free+0xbc/0x2e0
[ 1138.493227]  amdgpu_driver_postclose_kms+0x582/0x8d0 [amdgpu]
[ 1138.493963]  drm_file_free.part.0+0x638/0xb70
[ 1138.493969]  drm_release+0x1ea/0x470
[ 1138.493975]  __fput+0x213/0x9e0
[ 1138.493981]  task_work_run+0x11b/0x200
[ 1138.493987]  exit_to_user_mode_prepare+0x23a/0x260
[ 1138.493994]  syscall_exit_to_user_mode+0x16/0x50
[ 1138.494001]  do_syscall_64+0x67/0x80
[ 1138.494006]  entry_SYSCALL_64_after_hwframe+0x72/0xdc

[ 1138.494015] The buggy address belongs to the object at 88815adf
 which belongs to the cache kmalloc-4k of size 4096
[ 1138.494021] The buggy address is located 1216 bytes inside of
 freed 4096-byte region [88815adf, 88815adf1000)

[ 1138.494030] The buggy address belongs to the physical page:
[ 1138.494034] page:17cd5a82 refcount:1 mapcount:0
mapping: index:0x0 pfn:0x15adf0
[ 1138.494041] head:17cd5a82 order:3 entire_mapcount:0
nr_pages_mapped:0 pincount:0
[ 1138.494046] flags:
0x17c0010200(slab|head|node=0|zone=2|lastcpupid=0x1f)
[ 1138.494055] raw: 0017c0010200 88810004d040 dead0122

[ 1138.494061] raw:  00040004 0001

[ 1138.494065] page dumped because: kasan: bad access detected

[ 1138.494071] Memory state around the buggy address:
[ 1138.494076]  88815adf0380: fb fb fb fb fb fb fb fb fb fb fb fb
fb fb fb fb
[ 1138.494080]  88815adf0400: fb fb fb fb fb fb fb fb fb fb fb fb
fb fb fb fb
[ 1138.494084] >88815adf0480: fb fb fb fb fb fb fb fb fb fb fb fb
fb fb fb fb
[ 1138.494088]^
[ 1138.494092]  88815adf0500: fb fb fb fb fb fb fb fb fb fb fb fb
fb fb fb fb
[ 1138.494095]  88815adf0580: fb fb fb fb fb fb fb fb fb fb fb fb
fb fb fb fb
[ 1138.494099] 
==
[ 1138.494140] Disabling lock debugging due to kernel taint

First thing I checked RAM for sure that it is definitely not a
hardware problem. I started investigating which application triggered
this message. I started to notice that it usually happens when any
game starts in the steam client. So I came to the culprit of the
problem is the compilation of shaders. For reproduction we should have
a kernel with 

[linux-next:master] BUILD REGRESSION f3594f0204b756638267242e26d9de611435c3ba

2023-03-21 Thread kernel test robot
tree/branch: 
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master
branch HEAD: f3594f0204b756638267242e26d9de611435c3ba  Add linux-next specific 
files for 20230321

Error/Warning reports:

https://lore.kernel.org/oe-kbuild-all/202303082135.njdx1bij-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202303161521.jbgbafjj-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202303190142.tjyypbba-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202303211332.milzgukq-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202303212204.3g5mratj-...@intel.com

Error/Warning: (recently discovered and may have been fixed)

Warning: MAINTAINERS references a file that doesn't exist: 
Documentation/ABI/obsolete/sysfs-selinux-checkreqprot
Warning: MAINTAINERS references a file that doesn't exist: 
Documentation/ABI/obsolete/sysfs-selinux-disable
drivers/gpu/drm/amd/amdgpu/../pm/swsmu/smu13/smu_v13_0_6_ppt.c:309:17: sparse:  
  int
drivers/gpu/drm/amd/amdgpu/../pm/swsmu/smu13/smu_v13_0_6_ppt.c:309:17: sparse:  
  void
drivers/gpu/drm/imx/lcdc/imx-lcdc.c:411:11: error: call to undeclared function 
'devm_drm_of_get_bridge'; ISO C99 and later do not support implicit function 
declarations [-Wimplicit-function-declaration]
drivers/gpu/drm/imx/lcdc/imx-lcdc.c:411:18: error: implicit declaration of 
function 'devm_drm_of_get_bridge' [-Werror=implicit-function-declaration]
drivers/gpu/drm/imx/lcdc/imx-lcdc.c:411:9: error: incompatible integer to 
pointer conversion assigning to 'struct drm_bridge *' from 'int' 
[-Wint-conversion]
drivers/gpu/drm/imx/lcdc/imx-lcdc.c:449:15: error: implicit declaration of 
function 'drm_bridge_attach' [-Werror=implicit-function-declaration]
drivers/gpu/drm/imx/lcdc/imx-lcdc.c:449:61: error: use of undeclared identifier 
'DRM_BRIDGE_ATTACH_NO_CONNECTOR'
drivers/gpu/drm/imx/lcdc/imx-lcdc.c:449:68: error: 
'DRM_BRIDGE_ATTACH_NO_CONNECTOR' undeclared (first use in this function)
drivers/gpu/drm/imx/lcdc/imx-lcdc.c:449:8: error: call to undeclared function 
'drm_bridge_attach'; ISO C99 and later do not support implicit function 
declarations [-Wimplicit-function-declaration]
drivers/net/wireless/legacy/ray_cs.c:628:17: warning: 'strncpy' specified bound 
32 equals destination size [-Wstringop-truncation]
include/linux/compiler_types.h:338:27: error: expression in static assertion is 
not an integer
include/linux/container_of.h:20:54: error: invalid use of undefined type 
'struct module'
include/linux/rculist.h:392:21: error: invalid use of undefined type 'struct 
module'
include/linux/stddef.h:16:33: error: invalid use of undefined type 'struct 
module'
kernel/bpf/../module/internal.h:205:2: error: assigning to 'struct module *' 
from incompatible type 'void'
kernel/bpf/../module/internal.h:205:2: error: incomplete definition of type 
'struct module'
kernel/bpf/../module/internal.h:205:2: error: offsetof of incomplete type 
'typeof (*mod)' (aka 'struct module')
kernel/bpf/../module/internal.h:205:2: error: operand of type 'void' where 
arithmetic or pointer type is required

Unverified Error/Warning (likely false positive, please contact us if 
interested):

drivers/iommu/iommufd/selftest.c:295:21: sparse: sparse: symbol 
'mock_iommu_device' was not declared. Should it be static?
drivers/soc/fsl/qe/tsa.c:140:26: sparse: sparse: incorrect type in argument 2 
(different address spaces)
drivers/soc/fsl/qe/tsa.c:150:27: sparse: sparse: incorrect type in argument 1 
(different address spaces)
drivers/soc/fsl/qe/tsa.c:189:26: sparse: sparse: dereference of noderef 
expression
drivers/soc/fsl/qe/tsa.c:663:22: sparse: sparse: incorrect type in assignment 
(different address spaces)
drivers/soc/fsl/qe/tsa.c:673:21: sparse: sparse: incorrect type in assignment 
(different address spaces)
drivers/watchdog/imx2_wdt.c:442:22: sparse: sparse: symbol 'imx_wdt' was not 
declared. Should it be static?
drivers/watchdog/imx2_wdt.c:446:22: sparse: sparse: symbol 'imx_wdt_legacy' was 
not declared. Should it be static?
io_uring/io_uring.c:432 io_prep_async_work() error: we previously assumed 
'req->file' could be null (see line 425)
io_uring/kbuf.c:221 __io_remove_buffers() warn: variable dereferenced before 
check 'bl->buf_ring' (see line 219)

Error/Warning ids grouped by kconfigs:

gcc_recent_errors
|-- alpha-allyesconfig
|   `-- 
drivers-net-wireless-legacy-ray_cs.c:warning:strncpy-specified-bound-equals-destination-size
|-- arc-randconfig-r043-20230319
|   |-- 
include-linux-compiler_types.h:error:expression-in-static-assertion-is-not-an-integer
|   |-- 
include-linux-container_of.h:error:invalid-use-of-undefined-type-struct-module
|   |-- 
include-linux-rculist.h:error:invalid-use-of-undefined-type-struct-module
|   `-- include-linux-stddef.h:error:invalid-use-of-undefined-type-struct-module
|-- arm64-randconfig-r035-20230319
|   |-- 
include-linux-compiler_types.h:error:expression-in-static-assertion-is-not-an-integer
|   |-- 
include-linux-container_of.h:error:invalid-use-of-und

Re: [PATCH 02/11] drm/amdgpu/gfx11: check the CP FW version CP GFX shadow support

2023-03-21 Thread Christian König

Am 20.03.23 um 18:38 schrieb Alex Deucher:

Only set the supported flag if we have new enough CP FW.

XXX: don't commit this until the CP FW versions are finalized!

Signed-off-by: Alex Deucher 


Acked-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 9 +
  1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index a0d830dc0d01..4a50d0fbcdcf 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -469,6 +469,15 @@ static void gfx_v11_0_check_fw_cp_gfx_shadow(struct 
amdgpu_device *adev)
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
+   /* XXX fix me! */
+   if ((adev->gfx.me_fw_version >= 1498) &&
+   (adev->gfx.me_feature_version >= 29) &&
+   (adev->gfx.pfp_fw_version >= 1541) &&
+   (adev->gfx.pfp_feature_version >= 29) &&
+   (adev->gfx.mec_fw_version >= 507) &&
+   (adev->gfx.mec_feature_version >= 29))
+   adev->gfx.cp_gfx_shadow = true;
+   break;
default:
adev->gfx.cp_gfx_shadow = false;
break;




RE: [PATCH 00/19] DC Patches Mar 20th, 2023

2023-03-21 Thread Wheeler, Daniel
[AMD Official Use Only - General]

Hi all,
 
This week this patchset was tested on the following systems:
 
Lenovo Thinkpad T14s Gen2, with AMD Ryzen 5 5650U 
Lenovo Thinkpad T13s Gen4 with AMD Ryzen 5 6600U
Reference AMD RX6800
 
These systems were tested on the following display types: 
eDP, (1080p 60hz [5650U]) (1920x1200 60hz [6600U]) (2560x1600 120hz[6600U])
VGA and DVI (1680x1050 60HZ [DP to VGA/DVI, USB-C to DVI/VGA])
DP/HDMI/USB-C (1440p 170hz, 4k 60hz, 4k 144hz [Includes USB-C to DP/HDMI 
adapters])
 
MST tested with Startech MST14DP123DP and 2x 4k 60Hz displays
DSC tested with Cable Matters 101075 (DP to 3x DP), and 201375 (USB-C to 3x DP) 
with 3x 4k60 displays
HP Hook G2 with 1 and 2 4k60 Displays
 
The testing is a mix of automated and manual tests. Manual testing includes 
(but is not limited to):
Changing display configurations and settings
Benchmark testing
Feature testing (Freesync, etc.)
 
Automated testing includes (but is not limited to):
Script testing (scripts to automate some of the manual checks)
IGT testing
 
The patchset consists of the amd-staging-drm-next branch with new patches added 
on top of it. This branch is used for both Ubuntu and Chrome OS testing 
(ChromeOS on a bi-weekly basis).
 
 
Tested on Ubuntu 22.04.1 and Chrome OS
 
Tested-by: Daniel Wheeler 
 
 
Thank you,
 
Dan Wheeler
Sr. Technologist | AMD
SW Display
--
1 Commerce Valley Dr E, Thornhill, ON L3T 7X6
amd.com

-Original Message-
From: Zhuo, Qingqing (Lillian)  
Sent: March 18, 2023 3:56 AM
To: amd-gfx@lists.freedesktop.org
Cc: Wentland, Harry ; Li, Sun peng (Leo) 
; Lakha, Bhawanpreet ; Siqueira, 
Rodrigo ; Pillai, Aurabindo 
; Zhuo, Qingqing (Lillian) ; 
Li, Roman ; Lin, Wayne ; Wang, Chao-kai 
(Stylon) ; Chiu, Solomon ; Kotarac, 
Pavle ; Gutierrez, Agustin ; 
Wheeler, Daniel 
Subject: [PATCH 00/19] DC Patches Mar 20th, 2023

This DC patchset brings improvements in multiple areas. In summary, we 
highlight:
- Power down eDP if eDP not present
- Set MPC_SPLIT_DYNAMIC for DCN10 and DCN301
- Initialize link_srv in virtual env
- Code cleanup and alignment

Cc: Daniel Wheeler 
---

Alex Deucher (2):
  Revert "drm/amdgpu/display: change pipe policy for DCN 2.0"
  drm/amdgpu/smu11: enable TEMP_DEPENDENT_VMIN for navi1x

Alex Hung (1):
  drm/amd/display: remove outdated 8bpc comments

Aric Cyr (1):
  drm/amd/display: 3.2.228

Charlene Liu (3):
  drm/amd/display: update dio for two pixel per container case
  drm/amd/display: Add CRC and DMUB test support
  drm/amd/display: add missing code change init pix_per_cycle

Hersen Wu (3):
  drm/amd/display: align commit_planes_for_stream to latest dc code
  drm/amd/display: fix wrong index used in dccg32_set_dpstreamclk
  drm/amd/display: Set dcn32 caps.seamless_odm

Martin Leung (1):
  drm/amd/display: initialize link_srv in virtual env

Mustapha Ghaddar (1):
  drm/amd/display: Add function pointer for validate bw usb4

Paul Hsieh (1):
  drm/amd/display: power down eDP if eDP not present

Rodrigo Siqueira (3):
  drm/amd/display: Add const to a function
  drm/amd/display: Set MPC_SPLIT_DYNAMIC for DCN10
  drm/amd/display: Set MPC_SPLIT_DYNAMIC for DCN301

Saaem Rizvi (1):
  drm/amd/display: Implement workaround for writing to
OTG_PIXEL_RATE_DIV register

Wesley Chalmers (1):
  drm/amd/display: Make DCN32 3dlut function available to future DCNs

Zhikai Zhai (1):
  drm/amd/display: skip wait vblank

 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  1 -
 drivers/gpu/drm/amd/display/dc/core/dc.c  | 27 +++--
 .../drm/amd/display/dc/core/dc_link_exports.c |  8 +++--
 drivers/gpu/drm/amd/display/dc/dc.h   | 18 ++--
 .../gpu/drm/amd/display/dc/dcn10/dcn10_optc.h | 29 +++  
.../drm/amd/display/dc/dcn10/dcn10_resource.c |  4 +--  
.../gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h |  3 +-
 .../drm/amd/display/dc/dcn20/dcn20_hwseq.c|  4 +--
 .../drm/amd/display/dc/dcn20/dcn20_resource.c |  2 +-
 .../amd/display/dc/dcn301/dcn301_resource.c   |  2 +-
 .../gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c | 25 ++--  
.../gpu/drm/amd/display/dc/dcn32/dcn32_dccg.h |  3 +-
 .../dc/dcn32/dcn32_dio_stream_encoder.c   |  6 
 .../drm/amd/display/dc/dcn32/dcn32_hwseq.c|  4 +--
 .../drm/amd/display/dc/dcn32/dcn32_hwseq.h|  3 ++
 .../drm/amd/display/dc/dcn32/dcn32_resource.c |  1 +  
.../drm/amd/display/dc/dcn32/dcn32_resource.h |  3 +-
 drivers/gpu/drm/amd/display/dc/inc/link.h |  3 ++
 .../drm/amd/display/dc/link/link_detection.c  |  5 
 .../drm/amd/display/dc/link/link_factory.c|  1 +
 .../drm/amd/display/dc/link/link_validation.c | 18   
.../drm/amd/display/dc/link/link_validation.h |  4 +++
 .../dc/link/protocols/link_dp_dpia_bw.c   |  9 ++
 .../dc/link/protocols/link_dp_dpia_bw.h   |  7 -
 drivers/gpu/drm/amd/display/dmub/dmub_srv.h   |  2 ++
 

Re: [PATCH] drm/amdgpu: skip unload tmr when tmr is not loaded

2023-03-21 Thread Deucher, Alexander
[Public]

Reviewed-by: Alex Deucher 

From: Tong Liu01 
Sent: Tuesday, March 21, 2023 5:19 AM
To: amd-gfx@lists.freedesktop.org 
Cc: Quan, Evan ; Chen, Horace ; Tuikov, 
Luben ; Koenig, Christian ; 
Deucher, Alexander ; Xiao, Jack ; 
Zhang, Hawking ; Liu01, Tong (Esther) 

Subject: [PATCH] drm/amdgpu: skip unload tmr when tmr is not loaded

[why]
For Navi12 and CHIP_SIENNA_CICHLID SRIOV, TMR is not loaded. Should
also skip tmr unload

Signed-off-by: Tong Liu01 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 0b9e99c35a05..69addf2751aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -841,6 +841,12 @@ static int psp_tmr_unload(struct psp_context *psp)
 int ret;
 struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);

+   /* For Navi12 and CHIP_SIENNA_CICHLID SRIOV, do not set up TMR.
+* Already set up by host driver.
+*/
+   if (amdgpu_sriov_vf(psp->adev) && psp_skip_tmr(psp))
+   return 0;
+
 psp_prep_tmr_unload_cmd_buf(psp, cmd);
 dev_dbg(psp->adev->dev, "free PSP TMR buffer\n");

--
2.34.1



Re: [PATCH v3 1/5] drm/amdgpu: add UAPI for workload hints to ctx ioctl

2023-03-21 Thread Christian König

Yes, I would like to avoid having multiple code paths for context creation.

Setting it later on should be equally to specifying it on creation since 
we only need it during CS.


Regards,
Christian.

Am 21.03.23 um 14:00 schrieb Sharma, Shashank:


[AMD Official Use Only - General]

When we started this patch series, the workload hint was a part of the 
ctx_flag only,


But we changed that after the design review, to make it more like how 
we are handling PSTATE.


Details:

https://patchwork.freedesktop.org/patch/496111/

Regards

Shashank

*From:*Marek Olšák 
*Sent:* 21 March 2023 04:05
*To:* Sharma, Shashank 
*Cc:* amd-gfx@lists.freedesktop.org; Deucher, Alexander 
; Somalapuram, Amaranath 
; Koenig, Christian 

*Subject:* Re: [PATCH v3 1/5] drm/amdgpu: add UAPI for workload hints 
to ctx ioctl


I think we should do it differently because this interface will be 
mostly unused by open source userspace in its current form.


Let's set the workload hint in drm_amdgpu_ctx_in::flags, and that will 
be immutable for the lifetime of the context. No other interface is 
needed.


Marek

On Mon, Sep 26, 2022 at 5:41 PM Shashank Sharma 
 wrote:


Allow the user to specify a workload hint to the kernel.
We can use these to tweak the dpm heuristics to better match
the workload for improved performance.

V3: Create only set() workload UAPI (Christian)

Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
 include/uapi/drm/amdgpu_drm.h | 17 +
 1 file changed, 17 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h
b/include/uapi/drm/amdgpu_drm.h
index c2c9c674a223..23d354242699 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -212,6 +212,7 @@ union drm_amdgpu_bo_list {
 #define AMDGPU_CTX_OP_QUERY_STATE2     4
 #define AMDGPU_CTX_OP_GET_STABLE_PSTATE        5
 #define AMDGPU_CTX_OP_SET_STABLE_PSTATE        6
+#define AMDGPU_CTX_OP_SET_WORKLOAD_PROFILE     7

 /* GPU reset status */
 #define AMDGPU_CTX_NO_RESET            0
@@ -252,6 +253,17 @@ union drm_amdgpu_bo_list {
 #define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK  3
 #define AMDGPU_CTX_STABLE_PSTATE_PEAK  4

+/* GPU workload hints, flag bits 8-15 */
+#define AMDGPU_CTX_WORKLOAD_HINT_SHIFT     8
+#define AMDGPU_CTX_WORKLOAD_HINT_MASK      (0xff <<
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_NONE      (0 <<
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_3D        (1 <<
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_VIDEO     (2 <<
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_VR        (3 <<
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_COMPUTE   (4 <<
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_MAX AMDGPU_CTX_WORKLOAD_HINT_COMPUTE
+#define AMDGPU_CTX_WORKLOAD_INDEX(n)      (n >>
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+
 struct drm_amdgpu_ctx_in {
        /** AMDGPU_CTX_OP_* */
        __u32   op;
@@ -281,6 +293,11 @@ union drm_amdgpu_ctx_out {
                        __u32   flags;
                        __u32   _pad;
                } pstate;
+
+               struct {
+                       __u32   flags;
+                       __u32   _pad;
+               } workload;
 };

 union drm_amdgpu_ctx {
-- 
2.34.1




[PATCH] tests/amdgpu: Allow to exclude a test or a suite of tests

2023-03-21 Thread Luben Tuikov
Add the command line argument -e s[.t] to exclude (disable) suite s, or to
exclude suite s test t.

This is useful for instance to run the Basic Suite, but disable the GPU reset
test, on the command line, like this:

amdgpu_tests -s 1 -e 1.13

This option can be specified more than once on the command line, in order to
exclude more than one suite and/or suite and test combination from being run.

Cc: Alex Deucher 
Signed-off-by: Luben Tuikov 
---
 tests/amdgpu/amdgpu_test.c | 187 ++---
 1 file changed, 152 insertions(+), 35 deletions(-)

diff --git a/tests/amdgpu/amdgpu_test.c b/tests/amdgpu/amdgpu_test.c
index 59ca49bdef5f20..ec787889afd25f 100644
--- a/tests/amdgpu/amdgpu_test.c
+++ b/tests/amdgpu/amdgpu_test.c
@@ -296,11 +296,14 @@ static void display_test_suites(void)
 
 /** Help string for command line parameters */
 static const char usage[] =
-   "Usage: %s [-hlpr] [-s ] [-t ] [-f] "
+   "Usage: %s [-hlpr] [-s ] [-e [.] [-e ...]] [-t ] [-f] "
"[-b ] [-d ]\n"
"Where,\n"
"  -b  Specify device's PCI bus id to run tests\n"
"  -d  Specify device's PCI device id to run tests (optional)\n"
+   "  -e [.]  Disable test  of suite . If only  is given, 
then disable\n"
+   "  the whole suite. Can be specified more than once on the 
command line\n"
+   "  to disable multiple tests or suites.\n"
"  -f  Force executing inactive suite or test\n"
"  -h  Display this help\n"
"  -l  Display all test suites and their tests\n"
@@ -309,7 +312,7 @@ static const char usage[] =
"  -s   Enable only test suite \n"
"  -t   Enable only test  of test suite \n";
 /** Specified options strings for getopt */
-static const char options[]   = "hlrps:t:b:d:f";
+static const char options[]   = "hlrps:t:e:b:d:f";
 
 /* Open AMD devices.
  * Return the number of AMD device opened.
@@ -664,6 +667,48 @@ char *amdgpu_get_device_from_fd(int fd)
 #endif
 }
 
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(_A) (sizeof(_A)/sizeof(_A[0]))
+#endif
+
+static void amdgpu_test_disable(long suite, long test)
+{
+   const char *suite_name;
+
+   if (suite < 1)
+   return;
+
+   /* The array is 0-based, so subract 1. */
+   suite--;
+   if (suite >= ARRAY_SIZE(suites) - 1)
+   return;
+
+   suite_name = suites[suite].pName;
+   if (test < 1) {
+   fprintf(stderr, "Deactivating suite %s\n", suite_name);
+   amdgpu_set_suite_active(suite_name, CU_FALSE);
+   } else {
+   int ii;
+
+   /* The array is 0-based so subtract 1. */
+   test--;
+   for (ii = 0; suites[suite].pTests[ii].pName; ii++) {
+   if (ii == test) {
+   fprintf(stderr, "Deactivating %s:%s\n",
+   suite_name,
+   suites[suite].pTests[ii].pName);
+   amdgpu_set_test_active(suite_name,
+  
suites[suite].pTests[ii].pName,
+  CU_FALSE);
+   break;
+   }
+   }
+
+   if (suites[suite].pTests[ii].pName == NULL)
+   fprintf(stderr, "No such suite.test %ld.%ld\n", suite, 
test);
+   }
+}
+
 /* The main() function for setting up and running the tests.
  * Returns a CUE_SUCCESS on successful running, another
  * CUnit error code on failure.
@@ -682,48 +727,21 @@ int main(int argc, char **argv)
int display_list = 0;
int force_run = 0;
 
-   for (i = 0; i < MAX_CARDS_SUPPORTED; i++)
-   drm_amdgpu[i] = -1;
-
-
-   /* Parse command line string */
+   /* Parse command line string.
+* Process various command line options as early as possible.
+*/
opterr = 0; /* Do not print error messages from getopt */
while ((c = getopt(argc, argv, options)) != -1) {
switch (c) {
-   case 'l':
-   display_list = 1;
-   break;
-   case 's':
-   suite_id = atoi(optarg);
-   break;
-   case 't':
-   test_id = atoi(optarg);
-   break;
-   case 'b':
-   pci_bus_id = atoi(optarg);
-   break;
-   case 'd':
-   sscanf(optarg, "%x", _device_id);
-   break;
-   case 'p':
-   display_devices = 1;
-   break;
-   case 'r':
-   open_render_node = 1;
-   break;
-   case 'f':
-   force_run = 1;
-   break;
-   

Re: [PATCH v2 00/21] Enable Colorspace connector property in amdgpu

2023-03-21 Thread Sebastian Wick
FWIW, I still think this series is good (minus the UAPI changes) and
would allow us to work on user space HDR support without custom
kernels.

On Fri, Jan 13, 2023 at 5:24 PM Harry Wentland  wrote:
>
> This patchset enables the DP and HDMI infoframe properties
> in amdgpu.
>
> The first two patches are not completely related to the rest. The
> first patch allows for HDR_OUTPUT_METADATA with EOTFs that are
> unknown in the kernel.
>
> The second one prints a connector's max_bpc as part of the atomic
> state debugfs print.
>
> The following patches rework the connector colorspace code to
> 1) allow for easy printing of the colorspace in the drm_atomic
>state debugfs, and
> 2) allow drivers to specify the supported colorspaces on a
>connector.
>
> The rest of the patches deal with the Colorspace enablement
> in amdgpu.
>
> Why do drivers need to specify supported colorspaces? The amdgpu
> driver needs support for RGB-to-YCbCr conversion when we drive
> the display in YCbCr. This is currently not implemented for all
> colorspaces.
>
> Since the Colorspace property didn't have an IGT test I added
> one to kms_hdr. The relevant patchset can be found on the IGT
> mailing list or on
> https://gitlab.freedesktop.org/hwentland/igt-gpu-tools/-/tree/hdr-colorimetry
>
> We tested v1 of the patchset and confirmed that the infoframes
> are as expected for both DP and HDMI when running the IGT
> colorimetry tests.
>
> Open Items
> --
>
> A couple comments from Pekka about colorspace documentation are
> left unaddressed. I hope they won't block merging this set but
> should still be addressed separately.
>
> Pekka's questions really got me thinking of how this colorspace
> property should be used and working with it more closely with
> Joshua who is enabling HDR in gamescope made me wonder even more.
>
> Uma, is there a (canonical, upstream) userspace that uses this
> property that I can look at to understand more?
>
> One of the key challenges that is currently not addressed is that
> userspace is expected to pick a colorspace format straight from the
> list of definitions out of the DP or HDMI spec. But the kernel
> driver are the ones deciding on the output encoding (RGB, YCBCR444,
> YCBCR420, etc.). So there is no way for userspace to decide correctly
> between, for example, BT2020_RGB, BT2020_CYCC, BT2020_YCC.
>
> So we end up in a scenario where gamescope sets BT2020_RGB but we
> output YCBCR444 so have to correct the colorspace value to
> BT2020_YCC. This in turn breaks the colorspace IGT tests I
> wrote. I don't think "fixing" the IGT tests to accept this is
> the right thing to do.
>
> The way it stands this patchset allows us to specify the output
> colorspace on amdgpu and we try to do the right thing, but I don't
> thing the way the colorspace property is defined is right. We're trying
> to expose things to userspace that should be under driver control. A
> much better approach would be to give userspace options for colorspace
> that are not tied to DP or HDMI specs, i.e., sRGB, BT709, BT2020, etc.,
> and have the driver do the right thing to fill the infoframe, e.g., by
> picking BT2020_YCC if the requested colorspace is BT2020 and the
> is YCBCR444.
>
> If no upstream userspace currently makes use of this property I
> can make that change, i.e., no longer tie the colorspace property
> directly to the infoframe and reduce the options to sRGB, BT709,
> BT601, and BT2020 (and possibly opRGB).
>
> v2:
> - Tested with DP and HDMI analyzers
> - Confirmed driver will fallback to lower bpc when needed
> - Dropped hunk to set HDMI AVI infoframe as it was a no-op
> - Fixed BT.2020 YCbCr colorimetry (JoshuaAshton)
> - Simplify initialization of supported colorspaces (Jani)
> - Fix kerneldoc (kernel test robot)
>
> Cc: Pekka Paalanen 
> Cc: Sebastian Wick 
> Cc: vitaly.pros...@amd.com
> Cc: Uma Shankar 
> Cc: Ville Syrjälä 
> Cc: Joshua Ashton 
> Cc: Jani Nikula 
> Cc: Michel Dänzer 
> Cc: dri-de...@lists.freedesktop.org
> Cc: amd-gfx@lists.freedesktop.org
>
> Harry Wentland (16):
>   drm/display: Don't block HDR_OUTPUT_METADATA on unknown EOTF
>   drm/connector: print max_requested_bpc in state debugfs
>   drm/connector: Drop COLORIMETRY_NO_DATA
>   drm/connector: Convert DRM_MODE_COLORIMETRY to enum
>   drm/connector: Pull out common create_colorspace_property code
>   drm/connector: Allow drivers to pass list of supported colorspaces
>   drm/connector: Print connector colorspace in state debugfs
>   drm/amd/display: Always pass connector_state to stream validation
>   drm/amd/display: Register Colorspace property for DP and HDMI
>   drm/amd/display: Signal mode_changed if colorspace changed
>   drm/amd/display: Send correct DP colorspace infopacket
>   drm/amd/display: Add support for explicit BT601_YCC
>   drm/amd/display: Add debugfs for testing output colorspace
>   drm/amd/display: Add default case for output_color_space switch
>   drm/amd/display: Don't restrict bpc to 8 bpc
>   drm/amd/display: 

RE: [PATCH 3/3] drm/amdgpu: resume ras for gfx v11_0_3 during reset on SRIOV

2023-03-21 Thread Yang, Stanley
[AMD Official Use Only - General]

The series is Reviewed-by: Stanley Yang 

Regards,
Stanley
> -Original Message-
> From: amd-gfx  On Behalf Of
> YiPeng Chai
> Sent: Tuesday, March 21, 2023 10:40 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Zhou1, Tao ; Zhang, Hawking
> ; Chai, Thomas ; Chai,
> Thomas 
> Subject: [PATCH 3/3] drm/amdgpu: resume ras for gfx v11_0_3 during reset
> on SRIOV
> 
> Gfx v11_0_3 supports ras on SRIOV, so need to resume ras during reset.
> 
> Signed-off-by: YiPeng Chai 
> Reviewed-by: Hawking Zhang 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index d74d05802566..14d756caf839 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -5313,8 +5313,9 @@ int amdgpu_device_gpu_recover(struct
> amdgpu_device *adev,
>   if (r)
>   adev->asic_reset_res = r;
> 
> - /* Aldebaran supports ras in SRIOV, so need resume ras
> during reset */
> - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
> + /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need
> resume ras during reset */
> + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)
> ||
> + adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3))
>   amdgpu_ras_resume(adev);
>   } else {
>   r = amdgpu_do_asic_reset(device_list_handle,
> reset_context);
> --
> 2.34.1


RE: [PATCH v3 1/5] drm/amdgpu: add UAPI for workload hints to ctx ioctl

2023-03-21 Thread Sharma, Shashank
[AMD Official Use Only - General]

When we started this patch series, the workload hint was a part of the ctx_flag 
only,
But we changed that after the design review, to make it more like how we are 
handling PSTATE.

Details:
https://patchwork.freedesktop.org/patch/496111/

Regards
Shashank

From: Marek Olšák 
Sent: 21 March 2023 04:05
To: Sharma, Shashank 
Cc: amd-gfx@lists.freedesktop.org; Deucher, Alexander 
; Somalapuram, Amaranath 
; Koenig, Christian 
Subject: Re: [PATCH v3 1/5] drm/amdgpu: add UAPI for workload hints to ctx ioctl

I think we should do it differently because this interface will be mostly 
unused by open source userspace in its current form.

Let's set the workload hint in drm_amdgpu_ctx_in::flags, and that will be 
immutable for the lifetime of the context. No other interface is needed.

Marek

On Mon, Sep 26, 2022 at 5:41 PM Shashank Sharma 
mailto:shashank.sha...@amd.com>> wrote:
Allow the user to specify a workload hint to the kernel.
We can use these to tweak the dpm heuristics to better match
the workload for improved performance.

V3: Create only set() workload UAPI (Christian)

Signed-off-by: Alex Deucher 
mailto:alexander.deuc...@amd.com>>
Signed-off-by: Shashank Sharma 
mailto:shashank.sha...@amd.com>>
---
 include/uapi/drm/amdgpu_drm.h | 17 +
 1 file changed, 17 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index c2c9c674a223..23d354242699 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -212,6 +212,7 @@ union drm_amdgpu_bo_list {
 #define AMDGPU_CTX_OP_QUERY_STATE2 4
 #define AMDGPU_CTX_OP_GET_STABLE_PSTATE5
 #define AMDGPU_CTX_OP_SET_STABLE_PSTATE6
+#define AMDGPU_CTX_OP_SET_WORKLOAD_PROFILE 7

 /* GPU reset status */
 #define AMDGPU_CTX_NO_RESET0
@@ -252,6 +253,17 @@ union drm_amdgpu_bo_list {
 #define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK  3
 #define AMDGPU_CTX_STABLE_PSTATE_PEAK  4

+/* GPU workload hints, flag bits 8-15 */
+#define AMDGPU_CTX_WORKLOAD_HINT_SHIFT 8
+#define AMDGPU_CTX_WORKLOAD_HINT_MASK  (0xff << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_NONE  (0 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_3D(1 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_VIDEO (2 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_VR(3 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_COMPUTE   (4 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_MAX  AMDGPU_CTX_WORKLOAD_HINT_COMPUTE
+#define AMDGPU_CTX_WORKLOAD_INDEX(n)  (n >> AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+
 struct drm_amdgpu_ctx_in {
/** AMDGPU_CTX_OP_* */
__u32   op;
@@ -281,6 +293,11 @@ union drm_amdgpu_ctx_out {
__u32   flags;
__u32   _pad;
} pstate;
+
+   struct {
+   __u32   flags;
+   __u32   _pad;
+   } workload;
 };

 union drm_amdgpu_ctx {
--
2.34.1


Re: [PATCH 00/10] drm/radeon: Convert fbdev to DRM client

2023-03-21 Thread Thomas Zimmermann

Hi

Am 20.03.23 um 16:23 schrieb Alex Deucher:

On Mon, Mar 20, 2023 at 11:19 AM Thomas Zimmermann  wrote:


Hi

Am 20.03.23 um 16:11 schrieb Christian König:

Am 17.03.23 um 10:20 schrieb Thomas Zimmermann:

Hi Christian

Am 17.03.23 um 09:53 schrieb Christian König:

Am 16.03.23 um 10:37 schrieb Thomas Zimmermann:

Convert radeon's fbdev code to drm_client. Replaces the current
ad-hoc integration. The conversion includes a number of cleanups.
Only build fbdev support if the config option has been set.


I'm torn apart on that. On the one hand it looks like a really nice
cleanup on the other hand we don't really want to touch radeon any more.


It's a driver in the upstream kernel. You have to expect at least some
changes.


Some changes is not the problem, but we need a justification to change
something. Just that it's nice to have won't do it without extensive
testing.





Alex what do you think? Is that worth the risk of breaking stuff?


Moving all fbdev emulation to struct drm_client is required for new
in-kernel DRM clients, such as a DRM kernel logger or a boot splash.


Well that's a rather good justification. I suggest to add that to the
cover-letter.


Ok, will go into a possible v2. The mid-term plan is to convert the
fbdev code in all remaining drivers to struct drm_client and remove the
old ad-hoc callbacks.

With struct drm_client, we can select in-kernel clients at compile time
or runtime just like userspace clients. I guess, we can have a bootup
screen and then switch to the console or the DRM logger. Or go from any
client to the logger on kernel panics (or something like that). There's
been occasional talk about userspace consoles, which would use such
functionality.


Patches look good to me.  I have a pretty limited set of HW I can test
on since I don't have a functional AGP system anymore and most of my
older PCIe radeons are packed up in the attic.  Feel free to add my:


I've tested the patches with an R5-based card.


Reviewed-by: Alex Deucher 
to the series.


Thank you so much. Do you want to take the patches into the amd tree?

Best regards
Thomas



Alex



Best regards
Thomas



Regards,
Christian.



Best regards
Thomas



Christian.



Thomas Zimmermann (10):
drm/radeon: Move radeon_align_pitch() next to dumb-buffer helpers
drm/radeon: Improve fbdev object-test helper
drm/radeon: Remove struct radeon_fbdev
drm/radeon: Remove test for !screen_base in fbdev probing
drm/radeon: Move fbdev object helpers before struct fb_ops et al
drm/radeon: Fix coding style in fbdev emulation
drm/radeon: Move fbdev cleanup code into fb_destroy callback
drm/radeon: Correctly clean up failed display probing
drm/radeon: Implement client-based fbdev emulation
drm/radeon: Only build fbdev if DRM_FBDEV_EMULATION is set

   drivers/gpu/drm/radeon/Makefile |   3 +-
   drivers/gpu/drm/radeon/radeon.h |   2 +
   drivers/gpu/drm/radeon/radeon_display.c |   4 -
   drivers/gpu/drm/radeon/radeon_drv.c |   3 +-
   drivers/gpu/drm/radeon/radeon_drv.h |   1 -
   drivers/gpu/drm/radeon/radeon_fb.c  | 400 --
   drivers/gpu/drm/radeon/radeon_fbdev.c   | 422

   drivers/gpu/drm/radeon/radeon_gem.c |  24 ++
   drivers/gpu/drm/radeon/radeon_kms.c |  18 -
   drivers/gpu/drm/radeon/radeon_mode.h|  20 +-
   10 files changed, 464 insertions(+), 433 deletions(-)
   delete mode 100644 drivers/gpu/drm/radeon/radeon_fb.c
   create mode 100644 drivers/gpu/drm/radeon/radeon_fbdev.c


base-commit: ec0708e846b819c8d5b642de42448a87d7526564








--
Thomas Zimmermann
Graphics Driver Developer
SUSE Software Solutions Germany GmbH
Maxfeldstr. 5, 90409 Nürnberg, Germany
(HRB 36809, AG Nürnberg)
Geschäftsführer: Ivo Totev


--
Thomas Zimmermann
Graphics Driver Developer
SUSE Software Solutions Germany GmbH
Maxfeldstr. 5, 90409 Nürnberg, Germany
(HRB 36809, AG Nürnberg)
Geschäftsführer: Ivo Totev


OpenPGP_signature
Description: OpenPGP digital signature


[PATCH] drm/amdgpu: skip unload tmr when tmr is not loaded

2023-03-21 Thread Tong Liu01
[why]
For Navi12 and CHIP_SIENNA_CICHLID SRIOV, TMR is not loaded. Should
also skip tmr unload

Signed-off-by: Tong Liu01 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 0b9e99c35a05..69addf2751aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -841,6 +841,12 @@ static int psp_tmr_unload(struct psp_context *psp)
int ret;
struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
 
+   /* For Navi12 and CHIP_SIENNA_CICHLID SRIOV, do not set up TMR.
+* Already set up by host driver.
+*/
+   if (amdgpu_sriov_vf(psp->adev) && psp_skip_tmr(psp))
+   return 0;
+
psp_prep_tmr_unload_cmd_buf(psp, cmd);
dev_dbg(psp->adev->dev, "free PSP TMR buffer\n");
 
-- 
2.34.1