Re: [PATCH] drm/amdgpu: move task_info to amdgpu_fpriv

2023-10-16 Thread Christian König

Am 17.10.23 um 00:15 schrieb Felix Kuehling:

On 2023-10-16 13:08, Shashank Sharma wrote:

This patch does the following:
- moves vm->task_info struct to fpriv->task_info.
- makes task_info allocation dynamic.
- adds reference counting support for task_info structure.
- adds some new helper functions to find and put task_info.
- adds respective supporting changes for existing get_task_info 
consumers.


Cc: Christian Koenig 
Cc: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h |  2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  4 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 28 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 16 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 87 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  9 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c   |  5 +-
  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 19 +++--
  drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c  | 18 +++--
  drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   | 17 ++--
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 19 +++--
  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c  | 19 ++---
  drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c    | 20 ++---
  drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 14 ++--
  14 files changed, 186 insertions(+), 91 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index dc2d53081e80..a90780d38725 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -469,6 +469,8 @@ struct amdgpu_fpriv {
  struct mutex    bo_list_lock;
  struct idr    bo_list_handles;
  struct amdgpu_ctx_mgr    ctx_mgr;
+    struct amdgpu_task_info *task_info;
+
  /** GPU partition selection */
  uint32_t    xcp_id;
  };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c

index a4faea4fa0b5..6e9dcd13ee34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1763,9 +1763,11 @@ static int amdgpu_debugfs_vm_info_show(struct 
seq_file *m, void *unused)

  list_for_each_entry(file, &dev->filelist, lhead) {
  struct amdgpu_fpriv *fpriv = file->driver_priv;
  struct amdgpu_vm *vm = &fpriv->vm;
+    struct amdgpu_task_info *task_info = fpriv->task_info;
    seq_printf(m, "pid:%d\tProcess:%s --\n",
-    vm->task_info.pid, vm->task_info.process_name);
+    task_info ? task_info->pid : 0,
+    task_info ? task_info->process_name : "");
  r = amdgpu_bo_reserve(vm->root.bo, true);
  if (r)
  break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 3f001a50b34a..b372a87b9b77 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4872,6 +4872,27 @@ static void 
amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)

  dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_NOWAIT,
    amdgpu_devcoredump_read, amdgpu_devcoredump_free);
  }
+
+static void
+amdgpu_reset_dev_coredump(struct amdgpu_device *adev, struct 
amdgpu_reset_context *reset_context)

+{
+    struct amdgpu_task_info *ti;
+    struct amdgpu_vm *vm;
+
+    if (!reset_context->job || !reset_context->job->vm)
+    return;
+
+    vm = reset_context->job->vm;
+
+    /* Get reset task info and save a copy of data to be consumed 
later */

+    ti = amdgpu_vm_get_task_info(adev, vm->pasid);
+    if (ti) {
+    adev->reset_task_info = *ti;
+    amdgpu_reset_capture_coredumpm(adev);
+    }
+
+    amdgpu_vm_put_task_info(adev, vm->pasid);
+}
  #endif
    int amdgpu_do_asic_reset(struct list_head *device_list_handle,
@@ -4976,12 +4997,7 @@ int amdgpu_do_asic_reset(struct list_head 
*device_list_handle,

  vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
  #ifdef CONFIG_DEV_COREDUMP
  tmp_adev->reset_vram_lost = vram_lost;
-    memset(&tmp_adev->reset_task_info, 0,
-    sizeof(tmp_adev->reset_task_info));
-    if (reset_context->job && reset_context->job->vm)
-    tmp_adev->reset_task_info =
- reset_context->job->vm->task_info;
-    amdgpu_reset_capture_coredumpm(tmp_adev);
+    amdgpu_reset_dev_coredump(tmp_adev, reset_context);
  #endif
  if (vram_lost) {
  DRM_INFO("VRAM is lost due to GPU reset!\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

index 78476bc75b4e..99cf30c0bce6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -35,7 +35,7 @@ static enum drm_gpu_sched_stat 
amdgpu_job_timedout(struct drm_sched_job *s_job)

  {
  struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sc

RE: [PATCH 4/5] drm/amdgpu: bypass RAS error reset in some conditions

2023-10-16 Thread Yang, Stanley
[AMD Official Use Only - General]

The in_gpu_reset is set after reset error count and reset error status function 
call, so we can't use  amdgpu_in_reset(), please check ras->in_recovery flag.

Regards,
Stanley
From: Zhou1, Tao 
Sent: Friday, October 13, 2023 5:06 PM
To: Zhang, Hawking ; amd-gfx@lists.freedesktop.org; 
Yang, Stanley ; Li, Candice ; Chai, 
Thomas ; Wang, Yang(Kevin) 
Subject: Re: [PATCH 4/5] drm/amdgpu: bypass RAS error reset in some conditions


[AMD Official Use Only - General]

How about this condition:

if ((amdgpu_in_reset(adev) || amdgpu_ras_intr_triggered()) &&
   mca_funcs && mca_funcs->mca_set_debug_mode)

I use amdgpu_in_reset to skip touching it in all gpu resets, not only for the 
resets triggered by ras fatal error.

Regards,
Tao


From: Zhang, Hawking mailto:hawking.zh...@amd.com>>
Sent: Thursday, October 12, 2023 9:14 PM
To: Zhou1, Tao mailto:tao.zh...@amd.com>>; 
amd-gfx@lists.freedesktop.org 
mailto:amd-gfx@lists.freedesktop.org>>; Yang, 
Stanley mailto:stanley.y...@amd.com>>; Li, Candice 
mailto:candice...@amd.com>>; Chai, Thomas 
mailto:yipeng.c...@amd.com>>; Wang, Yang(Kevin) 
mailto:kevinyang.w...@amd.com>>
Subject: RE: [PATCH 4/5] drm/amdgpu: bypass RAS error reset in some conditions

[AMD Official Use Only - General]

-   if (!amdgpu_ras_is_supported(adev, block))
+   /* skip ras error reset in gpu reset */
+   if (amdgpu_in_reset(adev) &&
+   mca_funcs && mca_funcs->mca_set_debug_mode)
+   return 0;

We should check RAS in_recovery flag in such case. Reset domain is locked in 
relative late phase, at least *after* error counter harvest. Please double 
check.

Regards,
Hawking
-Original Message-
From: Zhou1, Tao mailto:tao.zh...@amd.com>>
Sent: Thursday, October 12, 2023 17:01
To: amd-gfx@lists.freedesktop.org; Yang, 
Stanley mailto:stanley.y...@amd.com>>; Zhang, Hawking 
mailto:hawking.zh...@amd.com>>; Li, Candice 
mailto:candice...@amd.com>>; Chai, Thomas 
mailto:yipeng.c...@amd.com>>; Wang, Yang(Kevin) 
mailto:kevinyang.w...@amd.com>>
Cc: Zhou1, Tao mailto:tao.zh...@amd.com>>
Subject: [PATCH 4/5] drm/amdgpu: bypass RAS error reset in some conditions

PMFW is responsible for RAS error reset in some conditions, driver can skip the 
operation.

Signed-off-by: Tao Zhou mailto:tao.zh...@amd.com>>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 18 --
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 91ed4fd96ee1..6dddb0423411 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1105,11 +1105,18 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device 
*adev,
enum amdgpu_ras_block block)
 {
struct amdgpu_ras_block_object *block_obj = 
amdgpu_ras_get_ras_block(adev, block, 0);
+   const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;

if (!block_obj || !block_obj->hw_ops)
return 0;

-   if (!amdgpu_ras_is_supported(adev, block))
+   /* skip ras error reset in gpu reset */
+   if (amdgpu_in_reset(adev) &&
+   mca_funcs && mca_funcs->mca_set_debug_mode)
+   return 0;
+
+   if (!amdgpu_ras_is_supported(adev, block) ||
+   !amdgpu_ras_get_mca_debug_mode(adev))
return 0;

if (block_obj->hw_ops->reset_ras_error_count)
@@ -1122,6 +1129,7 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device 
*adev,
enum amdgpu_ras_block block)
 {
struct amdgpu_ras_block_object *block_obj = 
amdgpu_ras_get_ras_block(adev, block, 0);
+   const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;

if (!block_obj || !block_obj->hw_ops) {
dev_dbg_once(adev->dev, "%s doesn't config RAS function\n", @@ 
-1129,7 +1137,13 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device 
*adev,
return 0;
}

-   if (!amdgpu_ras_is_supported(adev, block))
+   /* skip ras error reset in gpu reset */
+   if (amdgpu_in_reset(adev) &&
+   mca_funcs && mca_funcs->mca_set_debug_mode)
+   return 0;
+
+   if (!amdgpu_ras_is_supported(adev, block) ||
+   !amdgpu_ras_get_mca_debug_mode(adev))
return 0;

if (block_obj->hw_ops->reset_ras_error_count)
--
2.35.1


Re: [PATCH 1/2] drm/amdgpu : Add hive ras recovery check

2023-10-16 Thread Lazar, Lijo




On 10/17/2023 9:58 AM, Asad Kamal wrote:

Add hive ras recovery check and propagate fatal
error to aids of all sockets in the hive


May be reword it as 'If one of the devices in the hive detects a fatal 
error, need to send ras recovery reset message to PMFW of all devices in 
the hive. For that add a flag in hive to indicate that it's undergoing 
ras recovery'.


One other comment inline.

Series is-

Reviewed-by: Lijo Lazar 



Signed-off-by: Asad Kamal 
Reviewed-by: Hawking Zhang 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c  |  9 +++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h |  1 +
  drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 10 +-
  3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 5fb57419ef77..029871bfe714 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2061,9 +2061,11 @@ static void amdgpu_ras_do_recovery(struct work_struct 
*work)
struct amdgpu_device *remote_adev = NULL;
struct amdgpu_device *adev = ras->adev;
struct list_head device_list, *device_list_handle =  NULL;
+   struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
  
+	if (hive)

+   atomic_set(&hive->ras_recovery, 1);
if (!ras->disable_ras_err_cnt_harvest) {
-   struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
  
  		/* Build list of devices to query RAS related errors */

if  (hive && adev->gmc.xgmi.num_physical_nodes > 1) {
@@ -2080,7 +2082,6 @@ static void amdgpu_ras_do_recovery(struct work_struct 
*work)
amdgpu_ras_log_on_err_counter(remote_adev);
}
  
-		amdgpu_put_xgmi_hive(hive);

}
  
  	if (amdgpu_device_should_recover_gpu(ras->adev)) {

@@ -2115,6 +2116,10 @@ static void amdgpu_ras_do_recovery(struct work_struct 
*work)
amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
}
atomic_set(&ras->in_recovery, 0);
+   if (hive) {
+   atomic_set(&hive->ras_recovery, 0);
+   amdgpu_put_xgmi_hive(hive);
+   }
  }
  
  /* alloc/realloc bps array */

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index 86fbf56938f4..6cab882e8061 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -44,6 +44,7 @@ struct amdgpu_hive_info {
  
  	struct amdgpu_reset_domain *reset_domain;

uint32_t device_remove_count;
+   atomic_t ras_recovery;
  };
  
  struct amdgpu_pcs_ras_field {

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 8220bdcbd927..29bb2a3a3cb1 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -2184,13 +2184,21 @@ static int smu_v13_0_6_mode1_reset(struct smu_context 
*smu)
struct amdgpu_ras *ras;
u32 fatal_err, param;
int ret = 0;
+   struct amdgpu_hive_info *hive = NULL;
+   u32 hive_ras_recovery = 0;


Better to keep reverse christmas tree order for the whole set of 
declarations.


Thanks,
Lijo

  
+	hive = amdgpu_get_xgmi_hive(adev);

ras = amdgpu_ras_get_context(adev);
fatal_err = 0;
param = SMU_RESET_MODE_1;
  
+	if (hive) {

+   hive_ras_recovery = atomic_read(&hive->ras_recovery);
+   amdgpu_put_xgmi_hive(hive);
+   }
+
/* fatal error triggered by ras, PMFW supports the flag */
-   if (ras && atomic_read(&ras->in_recovery))
+   if (ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery))
fatal_err = 1;
  
  	param |= (fatal_err << 16);


[PATCH 1/2] drm/amdgpu : Add hive ras recovery check

2023-10-16 Thread Asad Kamal
Add hive ras recovery check and propagate fatal
error to aids of all sockets in the hive

Signed-off-by: Asad Kamal 
Reviewed-by: Hawking Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c  |  9 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h |  1 +
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 10 +-
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 5fb57419ef77..029871bfe714 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2061,9 +2061,11 @@ static void amdgpu_ras_do_recovery(struct work_struct 
*work)
struct amdgpu_device *remote_adev = NULL;
struct amdgpu_device *adev = ras->adev;
struct list_head device_list, *device_list_handle =  NULL;
+   struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
 
+   if (hive)
+   atomic_set(&hive->ras_recovery, 1);
if (!ras->disable_ras_err_cnt_harvest) {
-   struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
 
/* Build list of devices to query RAS related errors */
if  (hive && adev->gmc.xgmi.num_physical_nodes > 1) {
@@ -2080,7 +2082,6 @@ static void amdgpu_ras_do_recovery(struct work_struct 
*work)
amdgpu_ras_log_on_err_counter(remote_adev);
}
 
-   amdgpu_put_xgmi_hive(hive);
}
 
if (amdgpu_device_should_recover_gpu(ras->adev)) {
@@ -2115,6 +2116,10 @@ static void amdgpu_ras_do_recovery(struct work_struct 
*work)
amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
}
atomic_set(&ras->in_recovery, 0);
+   if (hive) {
+   atomic_set(&hive->ras_recovery, 0);
+   amdgpu_put_xgmi_hive(hive);
+   }
 }
 
 /* alloc/realloc bps array */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index 86fbf56938f4..6cab882e8061 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -44,6 +44,7 @@ struct amdgpu_hive_info {
 
struct amdgpu_reset_domain *reset_domain;
uint32_t device_remove_count;
+   atomic_t ras_recovery;
 };
 
 struct amdgpu_pcs_ras_field {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 8220bdcbd927..29bb2a3a3cb1 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -2184,13 +2184,21 @@ static int smu_v13_0_6_mode1_reset(struct smu_context 
*smu)
struct amdgpu_ras *ras;
u32 fatal_err, param;
int ret = 0;
+   struct amdgpu_hive_info *hive = NULL;
+   u32 hive_ras_recovery = 0;
 
+   hive = amdgpu_get_xgmi_hive(adev);
ras = amdgpu_ras_get_context(adev);
fatal_err = 0;
param = SMU_RESET_MODE_1;
 
+   if (hive) {
+   hive_ras_recovery = atomic_read(&hive->ras_recovery);
+   amdgpu_put_xgmi_hive(hive);
+   }
+
/* fatal error triggered by ras, PMFW supports the flag */
-   if (ras && atomic_read(&ras->in_recovery))
+   if (ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery))
fatal_err = 1;
 
param |= (fatal_err << 16);
-- 
2.42.0



[PATCH 2/2] drm/amdgpu: update retry times for psp BL wait

2023-10-16 Thread Asad Kamal
Increase retry time for PSP BL wait, to compensate
for longer time to set c2pmsg 35 ready bit during
mode1 with RAS

Signed-off-by: Asad Kamal 
Reviewed-by: Hawking Zhang 
---
 drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index b7bc00d4c696..e6d78e2a9376 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -168,7 +168,7 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context 
*psp)
 * If there is an error in processing command, bits[7:0] will be set.
 * This is applicable for PSP v13.0.6 and newer.
 */
-   for (retry_loop = 0; retry_loop < 10; retry_loop++) {
+   for (retry_loop = 0; retry_loop < PSP_VMBX_POLLING_LIMIT; retry_loop++) 
{
ret = psp_wait_for(
psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
0x8000, 0x, false);
-- 
2.42.0



[PATCH 2/2] gpu/drm: Eliminate DRM_SCHED_PRIORITY_UNSET

2023-10-16 Thread Luben Tuikov
Eliminate DRM_SCHED_PRIORITY_UNSET, value of -2, whose only user was
amdgpu. Furthermore, eliminate an index bug, in that when amdgpu boots, it
calls drm_sched_entity_init() with DRM_SCHED_PRIORITY_UNSET, which uses it to
index sched->sched_rq[].

Cc: Alex Deucher 
Cc: Christian König 
Signed-off-by: Luben Tuikov 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 3 ++-
 include/drm/gpu_scheduler.h | 3 +--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 092962b93064fc..aac52d9754e6da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -64,7 +64,8 @@ amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio)
 {
switch (ctx_prio) {
case AMDGPU_CTX_PRIORITY_UNSET:
-   return DRM_SCHED_PRIORITY_UNSET;
+   pr_warn_once("AMD-->DRM context priority value UNSET-->NORMAL");
+   return DRM_SCHED_PRIORITY_NORMAL;
 
case AMDGPU_CTX_PRIORITY_VERY_LOW:
return DRM_SCHED_PRIORITY_MIN;
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index f9544d9b670d33..ac65f0626cfc91 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -68,8 +68,7 @@ enum drm_sched_priority {
DRM_SCHED_PRIORITY_HIGH,
DRM_SCHED_PRIORITY_KERNEL,
 
-   DRM_SCHED_PRIORITY_COUNT,
-   DRM_SCHED_PRIORITY_UNSET = -2
+   DRM_SCHED_PRIORITY_COUNT
 };
 
 /* Used to chose between FIFO and RR jobs scheduling */
-- 
2.42.0



[PATCH 1/2] drm/amdgpu: Unset context priority is now invalid

2023-10-16 Thread Luben Tuikov
A context priority value of AMD_CTX_PRIORITY_UNSET is now invalid--instead of
carrying it around and passing it to the Direct Rendering Manager--and it
becomes AMD_CTX_PRIORITY_NORMAL in amdgpu_ctx_ioctl(), the gateway to context
creation.

Cc: Alex Deucher 
Cc: Christian König 
Signed-off-by: Luben Tuikov 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 0dc9c655c4fbdb..092962b93064fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -47,7 +47,6 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] 
= {
 bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio)
 {
switch (ctx_prio) {
-   case AMDGPU_CTX_PRIORITY_UNSET:
case AMDGPU_CTX_PRIORITY_VERY_LOW:
case AMDGPU_CTX_PRIORITY_LOW:
case AMDGPU_CTX_PRIORITY_NORMAL:
@@ -55,6 +54,7 @@ bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio)
case AMDGPU_CTX_PRIORITY_VERY_HIGH:
return true;
default:
+   case AMDGPU_CTX_PRIORITY_UNSET:
return false;
}
 }

base-commit: dc9b2e683bcba017588b9aaad80f442ad004a48f
-- 
2.42.0



[PATCH v12 9/9] drm/amd/pm: enable Wifi RFI mitigation feature support for SMU13.0.7

2023-10-16 Thread Ma Jun
From: Evan Quan 

Fulfill the SMU13.0.7 support for Wifi RFI mitigation feature.

Signed-off-by: Evan Quan 
Reviewed-by: Mario Limonciello 
Signed-off-by: Ma Jun 
--
v10->v11:
  - downgrade the prompt level on message failure(Lijo)
---
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c  | 59 +++
 1 file changed, 59 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index 62f2886ab4df..c5736fb3cf6d 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -126,6 +126,7 @@ static struct cmn2asic_msg_mapping 
smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(AllowGpo,   PPSMC_MSG_SetGpoAllow,  
 0),
MSG_MAP(GetPptLimit,PPSMC_MSG_GetPptLimit,  
   0),
MSG_MAP(NotifyPowerSource,  PPSMC_MSG_NotifyPowerSource,
   0),
+   MSG_MAP(EnableUCLKShadow,   PPSMC_MSG_EnableUCLKShadow, 
   0),
 };
 
 static struct cmn2asic_mapping smu_v13_0_7_clk_map[SMU_CLK_COUNT] = {
@@ -207,6 +208,7 @@ static struct cmn2asic_mapping 
smu_v13_0_7_table_map[SMU_TABLE_COUNT] = {
TAB_MAP(ACTIVITY_MONITOR_COEFF),
[SMU_TABLE_COMBO_PPTABLE] = {1, TABLE_COMBO_PPTABLE},
TAB_MAP(OVERDRIVE),
+   TAB_MAP(WIFIBAND),
 };
 
 static struct cmn2asic_mapping smu_v13_0_7_pwr_src_map[SMU_POWER_SOURCE_COUNT] 
= {
@@ -503,6 +505,9 @@ static int smu_v13_0_7_tables_init(struct smu_context *smu)
   AMDGPU_GEM_DOMAIN_VRAM);
SMU_TABLE_INIT(tables, SMU_TABLE_COMBO_PPTABLE, 
MP0_MP1_DATA_REGION_SIZE_COMBOPPTABLE,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
+   SMU_TABLE_INIT(tables, SMU_TABLE_WIFIBAND,
+  sizeof(WifiBandEntryTable_t), PAGE_SIZE,
+  AMDGPU_GEM_DOMAIN_VRAM);
 
smu_table->metrics_table = kzalloc(sizeof(SmuMetricsExternal_t), 
GFP_KERNEL);
if (!smu_table->metrics_table)
@@ -2179,6 +2184,57 @@ static int smu_v13_0_7_set_df_cstate(struct smu_context 
*smu,
   NULL);
 }
 
+static bool smu_v13_0_7_wbrf_support_check(struct smu_context *smu)
+{
+   return smu->smc_fw_version > 0x00524600;
+}
+
+static int smu_v13_0_7_set_wbrf_exclusion_ranges(struct smu_context *smu,
+struct freq_band_range 
*exclusion_ranges)
+{
+   WifiBandEntryTable_t wifi_bands;
+   int valid_entries = 0;
+   int ret, i;
+
+   memset(&wifi_bands, 0, sizeof(wifi_bands));
+   for (i = 0; i < ARRAY_SIZE(wifi_bands.WifiBandEntry); i++) {
+   if (!exclusion_ranges[i].start &&
+   !exclusion_ranges[i].end)
+   break;
+
+   /* PMFW expects the inputs to be in Mhz unit */
+   wifi_bands.WifiBandEntry[valid_entries].LowFreq =
+   DIV_ROUND_DOWN_ULL(exclusion_ranges[i].start, 
HZ_IN_MHZ);
+   wifi_bands.WifiBandEntry[valid_entries++].HighFreq =
+   DIV_ROUND_UP_ULL(exclusion_ranges[i].end, HZ_IN_MHZ);
+   }
+   wifi_bands.WifiBandEntryNum = valid_entries;
+
+   /*
+* Per confirm with PMFW team, WifiBandEntryNum = 0 is a valid setting.
+* Considering the scenarios below:
+* - At first the wifi device adds an exclusion range e.g. (2400,2500) 
to
+*   BIOS and our driver gets notified. We will set WifiBandEntryNum = 1
+*   and pass the WifiBandEntry (2400, 2500) to PMFW.
+*
+* - Later the wifi device removes the wifiband list added above and
+*   our driver gets notified again. At this time, driver will set
+*   WifiBandEntryNum = 0 and pass an empty WifiBandEntry list to PMFW.
+*   - PMFW may still need to do some uclk shadow update(e.g. switching
+* from shadow clock back to primary clock) on receiving this.
+*/
+
+   ret = smu_cmn_update_table(smu,
+  SMU_TABLE_WIFIBAND,
+  0,
+  (void *)(&wifi_bands),
+  true);
+   if (ret)
+   dev_warn(smu->adev->dev, "Failed to set wifiband!");
+
+   return ret;
+}
+
 static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
.get_allowed_feature_mask = smu_v13_0_7_get_allowed_feature_mask,
.set_default_dpm_table = smu_v13_0_7_set_default_dpm_table,
@@ -2247,6 +2303,9 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = 
{
.set_mp1_state = smu_v13_0_7_set_mp1_state,
.set_df_cstate = smu_v13_0_7_set_df_cstate,
.gpo_control = smu_v13_0_gpo_control,
+   .is_asic_wbrf_supported = smu_v13_0_7_wbrf_support_check,
+   .enable_uclk_shadow = smu_v13_0_enable_uclk_shadow,
+   .set_wbrf_exclusio

[PATCH v12 8/9] drm/amd/pm: enable Wifi RFI mitigation feature support for SMU13.0.0

2023-10-16 Thread Ma Jun
From: Evan Quan 

Fulfill the SMU13.0.0 support for Wifi RFI mitigation feature.

Signed-off-by: Evan Quan 
Reviewed-by: Mario Limonciello 
Signed-off-by: Ma Jun 
--
v10->v11:
  - downgrade the prompt level on message failure(Lijo)
---
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  3 +
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h  |  3 +-
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h  |  3 +
 .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c|  9 +++
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  | 60 +++
 5 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index d396a18fe0f3..6f88c352b53e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -325,6 +325,7 @@ enum smu_table_id
SMU_TABLE_PACE,
SMU_TABLE_ECCINFO,
SMU_TABLE_COMBO_PPTABLE,
+   SMU_TABLE_WIFIBAND,
SMU_TABLE_COUNT,
 };
 
@@ -1501,6 +1502,8 @@ enum smu_baco_seq {
 __dst_size);  \
 })
 
+#define HZ_IN_MHZ  100U
+
 #if !defined(SWSMU_CODE_LAYER_L2) && !defined(SWSMU_CODE_LAYER_L3) && 
!defined(SWSMU_CODE_LAYER_L4)
 int smu_get_power_limit(void *handle,
uint32_t *limit,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
index 297b70b9388f..5bbb60289a79 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
@@ -245,7 +245,8 @@
__SMU_DUMMY_MAP(AllowGpo),  \
__SMU_DUMMY_MAP(Mode2Reset),\
__SMU_DUMMY_MAP(RequestI2cTransaction), \
-   __SMU_DUMMY_MAP(GetMetricsTable),
+   __SMU_DUMMY_MAP(GetMetricsTable), \
+   __SMU_DUMMY_MAP(EnableUCLKShadow),
 
 #undef __SMU_DUMMY_MAP
 #define __SMU_DUMMY_MAP(type)  SMU_MSG_##type
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index 355c156d871a..dd70b56aa71e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -299,5 +299,8 @@ int smu_v13_0_update_pcie_parameters(struct smu_context 
*smu,
 uint32_t pcie_gen_cap,
 uint32_t pcie_width_cap);
 
+int smu_v13_0_enable_uclk_shadow(struct smu_context *smu,
+bool enablement);
+
 #endif
 #endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index 9b62b45ebb7f..6a5cb582aa92 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -2472,3 +2472,12 @@ int smu_v13_0_update_pcie_parameters(struct smu_context 
*smu,
 
return 0;
 }
+
+int smu_v13_0_enable_uclk_shadow(struct smu_context *smu,
+bool enablement)
+{
+   return smu_cmn_send_smc_msg_with_param(smu,
+  SMU_MSG_EnableUCLKShadow,
+  enablement,
+  NULL);
+}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 0fb6be11a0cc..08ab19559c7b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -154,6 +154,7 @@ static struct cmn2asic_msg_mapping 
smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(AllowGpo,   PPSMC_MSG_SetGpoAllow,  
 0),
MSG_MAP(AllowIHHostInterrupt,   PPSMC_MSG_AllowIHHostInterrupt, 
  0),
MSG_MAP(ReenableAcDcInterrupt,  
PPSMC_MSG_ReenableAcDcInterrupt,   0),
+   MSG_MAP(EnableUCLKShadow,   PPSMC_MSG_EnableUCLKShadow, 
   0),
 };
 
 static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = {
@@ -237,6 +238,7 @@ static struct cmn2asic_mapping 
smu_v13_0_0_table_map[SMU_TABLE_COUNT] = {
TAB_MAP(I2C_COMMANDS),
TAB_MAP(ECCINFO),
TAB_MAP(OVERDRIVE),
+   TAB_MAP(WIFIBAND),
 };
 
 static struct cmn2asic_mapping smu_v13_0_0_pwr_src_map[SMU_POWER_SOURCE_COUNT] 
= {
@@ -496,6 +498,9 @@ static int smu_v13_0_0_tables_init(struct smu_context *smu)
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
SMU_TABLE_INIT(tables, SMU_TABLE_ECCINFO, sizeof(EccInfoTable_t),
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
+   SMU_TABLE_INIT(tables, SMU_TABLE_WIFIBAND,
+  sizeof(WifiBandEntryTable_t), PAGE_SIZE,
+  AMDGPU_GEM_DOMAIN_VRAM);
 
smu_table->metrics_table = kzalloc(sizeof(SmuMetricsExternal_t), 
GFP_KERNEL);
if (!smu_table->metrics_table)
@@ -2607,6 +2612,58 @@ static ssize_t smu_v13_0_

[PATCH v12 7/9] drm/amd/pm: add flood detection for wbrf events

2023-10-16 Thread Ma Jun
From: Evan Quan 

To protect PMFW from being overloaded.

Signed-off-by: Evan Quan 
Reviewed-by: Mario Limonciello 
Signed-off-by: Ma Jun 
---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 31 +++
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  7 +
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index d52cd7ed2868..b470f7b7c91d 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1319,7 +1319,8 @@ static int smu_wbrf_event_handler(struct notifier_block 
*nb,
 
switch (action) {
case WBRF_CHANGED:
-   smu_wbrf_handle_exclusion_ranges(smu);
+   schedule_delayed_work(&smu->wbrf_delayed_work,
+ 
msecs_to_jiffies(SMU_WBRF_EVENT_HANDLING_PACE));
break;
default:
return NOTIFY_DONE;
@@ -1328,6 +1329,21 @@ static int smu_wbrf_event_handler(struct notifier_block 
*nb,
return NOTIFY_OK;
 }
 
+/**
+ * smu_wbrf_delayed_work_handler - callback on delayed work timer expired
+ *
+ * @work: struct work_struct pointer
+ *
+ * Flood is over and driver will consume the latest exclusion ranges.
+ */
+static void smu_wbrf_delayed_work_handler(struct work_struct *work)
+{
+   struct smu_context *smu =
+   container_of(work, struct smu_context, wbrf_delayed_work.work);
+
+   smu_wbrf_handle_exclusion_ranges(smu);
+}
+
 /**
  * smu_wbrf_support_check - check wbrf support
  *
@@ -1358,12 +1374,14 @@ static void smu_wbrf_support_check(struct smu_context 
*smu)
  */
 static int smu_wbrf_init(struct smu_context *smu)
 {
-   struct amdgpu_device *adev = smu->adev;
int ret;
 
if (!smu->wbrf_supported)
return 0;
 
+   INIT_DELAYED_WORK(&smu->wbrf_delayed_work,
+ smu_wbrf_delayed_work_handler);
+
smu->wbrf_notifier.notifier_call = smu_wbrf_event_handler;
ret = amd_wbrf_register_notifier(&smu->wbrf_notifier);
if (ret)
@@ -1374,11 +1392,10 @@ static int smu_wbrf_init(struct smu_context *smu)
 * before our driver loaded. To make sure our driver
 * is awared of those exclusion ranges.
 */
-   ret = smu_wbrf_handle_exclusion_ranges(smu);
-   if (ret)
-   dev_err(adev->dev, "Failed to handle wbrf exclusion ranges\n");
+   schedule_delayed_work(&smu->wbrf_delayed_work,
+ msecs_to_jiffies(SMU_WBRF_EVENT_HANDLING_PACE));
 
-   return ret;
+   return 0;
 }
 
 /**
@@ -1394,6 +1411,8 @@ static void smu_wbrf_fini(struct smu_context *smu)
return;
 
amd_wbrf_unregister_notifier(&smu->wbrf_notifier);
+
+   cancel_delayed_work_sync(&smu->wbrf_delayed_work);
 }
 
 static int smu_smc_hw_setup(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 39c1620d68c9..d396a18fe0f3 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -480,6 +480,12 @@ struct stb_context {
 
 #define WORKLOAD_POLICY_MAX 7
 
+/*
+ * Configure wbrf event handling pace as there can be only one
+ * event processed every SMU_WBRF_EVENT_HANDLING_PACE ms.
+ */
+#define SMU_WBRF_EVENT_HANDLING_PACE   10
+
 struct smu_context
 {
struct amdgpu_device*adev;
@@ -581,6 +587,7 @@ struct smu_context
/* data structures for wbrf feature support */
boolwbrf_supported;
struct notifier_block   wbrf_notifier;
+   struct delayed_work wbrf_delayed_work;
 };
 
 struct i2c_adapter;
-- 
2.34.1



[PATCH v12 6/9] drm/amd/pm: setup the framework to support Wifi RFI mitigation feature

2023-10-16 Thread Ma Jun
From: Evan Quan 

With WBRF feature supported, as a driver responding to the frequencies,
amdgpu driver is able to do shadow pstate switching to mitigate possible
interference(between its (G-)DDR memory clocks and local radio module
frequency bands used by Wifi 6/6e/7).

Signed-off-by: Evan Quan 
Reviewed-by: Mario Limonciello 
Signed-off-by: Ma Jun 
--
v1->v2:
  - update the prompt for feature support(Lijo)
v8->v9:
  - update parameter document for smu_wbrf_event_handler(Simon)
v9->v10:
v10->v11:
 - correct the logics for wbrf range sorting(Lijo)
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  17 ++
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 195 ++
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  23 +++
 drivers/gpu/drm/amd/pm/swsmu/smu_internal.h   |   3 +
 5 files changed, 240 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 6dc950c1b689..11a19384df56 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -247,6 +247,8 @@ extern int amdgpu_sg_display;
 
 extern int amdgpu_user_partt_mode;
 
+extern int amdgpu_wbrf;
+
 #define AMDGPU_VM_MAX_NUM_CTX  4096
 #define AMDGPU_SG_THRESHOLD(256*1024*1024)
 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 0593ef8fe0a6..1c574bd3b60d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -195,6 +195,7 @@ int amdgpu_use_xgmi_p2p = 1;
 int amdgpu_vcnfw_log;
 int amdgpu_sg_display = -1; /* auto */
 int amdgpu_user_partt_mode = AMDGPU_AUTO_COMPUTE_PARTITION_MODE;
+int amdgpu_wbrf = -1;
 
 static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
 
@@ -981,6 +982,22 @@ module_param_named(user_partt_mode, 
amdgpu_user_partt_mode, uint, 0444);
 module_param(enforce_isolation, bool, 0444);
 MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between 
graphics and compute . enforce_isolation = on");
 
+/**
+ * DOC: wbrf (int)
+ * Enable Wifi RFI interference mitigation feature.
+ * Due to electrical and mechanical constraints there may be likely 
interference of
+ * relatively high-powered harmonics of the (G-)DDR memory clocks with local 
radio
+ * module frequency bands used by Wifi 6/6e/7. To mitigate the possible RFI 
interference,
+ * with this feature enabled, PMFW will use either “shadowed P-State” or 
“P-State” based
+ * on active list of frequencies in-use (to be avoided) as part of initial 
setting or
+ * P-state transition. However, there may be potential performance impact with 
this
+ * feature enabled.
+ * (0 = disabled, 1 = enabled, -1 = auto (default setting, will be enabled if 
supported))
+ */
+MODULE_PARM_DESC(wbrf,
+   "Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 
= auto(default)");
+module_param_named(wbrf, amdgpu_wbrf, int, 0444);
+
 /* These devices are not supported by amdgpu.
  * They are supported by the mach64, r128, radeon drivers
  */
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 222af2fae745..d52cd7ed2868 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1228,6 +1228,174 @@ static int smu_get_thermal_temperature_range(struct 
smu_context *smu)
return ret;
 }
 
+/**
+ * smu_wbrf_handle_exclusion_ranges - consume the wbrf exclusion ranges
+ *
+ * @smu: smu_context pointer
+ *
+ * Retrieve the wbrf exclusion ranges and send them to PMFW for proper 
handling.
+ * Returns 0 on success, error on failure.
+ */
+static int smu_wbrf_handle_exclusion_ranges(struct smu_context *smu)
+{
+   struct wbrf_ranges_in_out wbrf_exclusion = {0};
+   struct freq_band_range *wifi_bands = wbrf_exclusion.band_list;
+   struct amdgpu_device *adev = smu->adev;
+   uint32_t num_of_wbrf_ranges = MAX_NUM_OF_WBRF_RANGES;
+   uint64_t start, end;
+   int ret, i, j;
+
+   ret = amd_wbrf_retrieve_freq_band(adev->dev, &wbrf_exclusion);
+   if (ret) {
+   dev_err(adev->dev, "Failed to retrieve exclusion ranges!\n");
+   return ret;
+   }
+
+   /*
+* The exclusion ranges array we got might be filled with holes and 
duplicate
+* entries. For example:
+* {(2400, 2500), (0, 0), (6882, 6962), (2400, 2500), (0, 0), (6117, 
6189), (0, 0)...}
+* We need to do some sortups to eliminate those holes and duplicate 
entries.
+* Expected output: {(2400, 2500), (6117, 6189), (6882, 6962), (0, 
0)...}
+*/
+   for (i = 0; i < num_of_wbrf_ranges; i++) {
+   start = wifi_bands[i].start;
+   end = wifi_bands[i].end;
+
+   /* get the last valid entry to fill the intermediate hole */
+   if (!

[PATCH v12 5/9] drm/amd/pm: update driver_if and ppsmc headers for coming wbrf feature

2023-10-16 Thread Ma Jun
From: Evan Quan 

Add those data structures to support Wifi RFI mitigation feature.

Signed-off-by: Evan Quan 
Reviewed-by: Mario Limonciello 
Signed-off-by: Ma Jun 
---
 .../pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h | 14 +-
 .../pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h | 14 +-
 .../amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h   |  3 ++-
 .../amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h   |  3 ++-
 4 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
index 9dd1ed5b8940..e481407b6584 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
@@ -391,6 +391,17 @@ typedef struct {
   EccInfo_t  EccInfo[24];
 } EccInfoTable_t;
 
+typedef struct {
+  uint16_t LowFreq;
+  uint16_t HighFreq;
+} WifiOneBand_t;
+
+typedef struct {
+  uint32_t WifiBandEntryNum;
+  WifiOneBand_tWifiBandEntry[11];
+  uint32_t MmHubPadding[8];
+} WifiBandEntryTable_t;
+
 //D3HOT sequences
 typedef enum {
   BACO_SEQUENCE,
@@ -1615,7 +1626,8 @@ typedef struct {
 #define TABLE_I2C_COMMANDS9
 #define TABLE_DRIVER_INFO 10
 #define TABLE_ECCINFO 11
-#define TABLE_COUNT   12
+#define TABLE_WIFIBAND12
+#define TABLE_COUNT   13
 
 //IH Interupt ID
 #define IH_INTERRUPT_ID_TO_DRIVER   0xFE
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
index 62b7c0daff68..1530ca002c6c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
@@ -392,6 +392,17 @@ typedef struct {
   EccInfo_t  EccInfo[24];
 } EccInfoTable_t;
 
+typedef struct {
+  uint16_t LowFreq;
+  uint16_t HighFreq;
+} WifiOneBand_t;
+
+typedef struct {
+  uint32_t WifiBandEntryNum;
+  WifiOneBand_tWifiBandEntry[11];
+  uint32_t MmHubPadding[8];
+} WifiBandEntryTable_t;
+
 //D3HOT sequences
 typedef enum {
   BACO_SEQUENCE,
@@ -1605,7 +1616,8 @@ typedef struct {
 #define TABLE_I2C_COMMANDS9
 #define TABLE_DRIVER_INFO 10
 #define TABLE_ECCINFO 11
-#define TABLE_COUNT   12
+#define TABLE_WIFIBAND12
+#define TABLE_COUNT   13
 
 //IH Interupt ID
 #define IH_INTERRUPT_ID_TO_DRIVER   0xFE
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h
index 10cff75b44d5..c98cc32d11bd 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h
@@ -138,7 +138,8 @@
 #define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4A
 #define PPSMC_MSG_SetPriorityDeltaGain   0x4B
 #define PPSMC_MSG_AllowIHHostInterrupt   0x4C
-#define PPSMC_Message_Count  0x4D
+#define PPSMC_MSG_EnableUCLKShadow   0x51
+#define PPSMC_Message_Count  0x52
 
 //Debug Dump Message
 #define DEBUGSMC_MSG_TestMessage0x1
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h
index 6aaefca9b595..a6bf9cdd130e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h
@@ -134,6 +134,7 @@
 #define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4A
 #define PPSMC_MSG_SetPriorityDeltaGain   0x4B
 #define PPSMC_MSG_AllowIHHostInterrupt   0x4C
-#define PPSMC_Message_Count  0x4D
+#define PPSMC_MSG_EnableUCLKShadow   0x51
+#define PPSMC_Message_Count  0x52
 
 #endif
-- 
2.34.1



[PATCH v12 4/9] wifi: mac80211: Add support for WBRF features

2023-10-16 Thread Ma Jun
From: Evan Quan 

To support the WBRF mechanism, Wifi adapters utilized in the system must
register the frequencies in use(or unregister those frequencies no longer
used) via the dedicated calls. So that, other drivers responding to the
frequencies can take proper actions to mitigate possible interference.

Co-developed-by: Mario Limonciello 
Signed-off-by: Mario Limonciello 
Co-developed-by: Evan Quan 
Signed-off-by: Evan Quan 
Signed-off-by: Ma Jun 
--
v1->v2:
  - place the new added member(`wbrf_supported`) in
ieee80211_local(Johannes)
  - handle chandefs change scenario properly(Johannes)
  - some minor fixes around code sharing and possible invalid input
checks(Johannes)
v2->v3:
  - drop unnecessary input checks and intermediate APIs(Mario)
  - Separate some mac80211 common code(Mario, Johannes)
v3->v4:
  - some minor fixes around return values(Johannes)
v9->v10:
  - get ranges_in->num_of_ranges set and passed in(Johannes)
v12:
  - use acpi_amd_wbrf_add_remove to replace the acpi_amd_wbrf_add_exclusion
acpi_amd_wbrf_remove_exclusion
---
 include/linux/ieee80211.h  |   1 +
 net/mac80211/Makefile  |   2 +
 net/mac80211/chan.c|   9 
 net/mac80211/ieee80211_i.h |   9 
 net/mac80211/main.c|   2 +
 net/mac80211/wbrf.c| 105 +
 6 files changed, 128 insertions(+)
 create mode 100644 net/mac80211/wbrf.c

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 4b998090898e..f995d06da87f 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -4335,6 +4335,7 @@ static inline int ieee80211_get_tdls_action(struct 
sk_buff *skb, u32 hdr_size)
 /* convert frequencies */
 #define MHZ_TO_KHZ(freq) ((freq) * 1000)
 #define KHZ_TO_MHZ(freq) ((freq) / 1000)
+#define KHZ_TO_HZ(freq)  ((freq) * 1000)
 #define PR_KHZ(f) KHZ_TO_MHZ(f), f % 1000
 #define KHZ_F "%d.%03d"
 
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index b8de44da1fb8..d46c36f55fd3 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -65,4 +65,6 @@ rc80211_minstrel-$(CONFIG_MAC80211_DEBUGFS) += \
 
 mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y)
 
+mac80211-y += wbrf.o
+
 ccflags-y += -DDEBUG
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 68952752b599..458469c224ae 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -506,11 +506,16 @@ static void _ieee80211_change_chanctx(struct 
ieee80211_local *local,
 
WARN_ON(!cfg80211_chandef_compatible(&ctx->conf.def, chandef));
 
+   ieee80211_remove_wbrf(local, &ctx->conf.def);
+
ctx->conf.def = *chandef;
 
/* check if min chanctx also changed */
changed = IEEE80211_CHANCTX_CHANGE_WIDTH |
  _ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for);
+
+   ieee80211_add_wbrf(local, &ctx->conf.def);
+
drv_change_chanctx(local, ctx, changed);
 
if (!local->use_chanctx) {
@@ -668,6 +673,8 @@ static int ieee80211_add_chanctx(struct ieee80211_local 
*local,
lockdep_assert_held(&local->mtx);
lockdep_assert_held(&local->chanctx_mtx);
 
+   ieee80211_add_wbrf(local, &ctx->conf.def);
+
if (!local->use_chanctx)
local->hw.conf.radar_enabled = ctx->conf.radar_enabled;
 
@@ -748,6 +755,8 @@ static void ieee80211_del_chanctx(struct ieee80211_local 
*local,
}
 
ieee80211_recalc_idle(local);
+
+   ieee80211_remove_wbrf(local, &ctx->conf.def);
 }
 
 static void ieee80211_free_chanctx(struct ieee80211_local *local,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 91633a0b723e..719f2c892132 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1600,6 +1600,8 @@ struct ieee80211_local {
 
/* extended capabilities provided by mac80211 */
u8 ext_capa[8];
+
+   bool wbrf_supported;
 };
 
 static inline struct ieee80211_sub_if_data *
@@ -2638,4 +2640,11 @@ ieee80211_eht_cap_ie_to_sta_eht_cap(struct 
ieee80211_sub_if_data *sdata,
const struct ieee80211_eht_cap_elem 
*eht_cap_ie_elem,
u8 eht_cap_len,
struct link_sta_info *link_sta);
+
+void ieee80211_check_wbrf_support(struct ieee80211_local *local);
+void ieee80211_add_wbrf(struct ieee80211_local *local,
+   struct cfg80211_chan_def *chandef);
+void ieee80211_remove_wbrf(struct ieee80211_local *local,
+  struct cfg80211_chan_def *chandef);
+
 #endif /* IEEE80211_I_H */
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 24315d7b3126..b20bdaac84db 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1396,6 +1396,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
debugfs_hw_add(local);
rate_control_add_debugfs(local);
 
+   ieee80211_check_wbrf_support(local);
+
rtnl_lock();
wiphy_lock(hw->wiphy);
 
diff -

[PATCH v12 3/9] cfg80211: expose nl80211_chan_width_to_mhz for wide sharing

2023-10-16 Thread Ma Jun
From: Evan Quan 

The newly added WBRF feature needs this interface for channel
width calculation.

Signed-off-by: Ma Jun 
Signed-off-by: Evan Quan 

--
v8->v9:
  - correct typo(Mhz -> MHz) (Johnson)
---
 include/net/cfg80211.h | 8 
 net/wireless/chan.c| 3 ++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index d6fa7c8767ad..026d91083f37 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -923,6 +923,14 @@ const struct cfg80211_chan_def *
 cfg80211_chandef_compatible(const struct cfg80211_chan_def *chandef1,
const struct cfg80211_chan_def *chandef2);
 
+/**
+ * nl80211_chan_width_to_mhz - get the channel width in MHz
+ * @chan_width: the channel width from &enum nl80211_chan_width
+ * Return: channel width in MHz if the chan_width from &enum nl80211_chan_width
+ * is valid. -1 otherwise.
+ */
+int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width);
+
 /**
  * cfg80211_chandef_valid - check if a channel definition is valid
  * @chandef: the channel definition to check
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 0b7e81db383d..227db04eac42 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -141,7 +141,7 @@ static bool cfg80211_edmg_chandef_valid(const struct 
cfg80211_chan_def *chandef)
return true;
 }
 
-static int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width)
+int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width)
 {
int mhz;
 
@@ -190,6 +190,7 @@ static int nl80211_chan_width_to_mhz(enum 
nl80211_chan_width chan_width)
}
return mhz;
 }
+EXPORT_SYMBOL(nl80211_chan_width_to_mhz);
 
 static int cfg80211_chandef_get_width(const struct cfg80211_chan_def *c)
 {
-- 
2.34.1



[PATCH v12 2/9] platform/x86/amd: Add support for AMD ACPI based Wifi band RFI mitigation feature

2023-10-16 Thread Ma Jun
Due to electrical and mechanical constraints in certain platform designs
there may be likely interference of relatively high-powered harmonics of
the (G-)DDR memory clocks with local radio module frequency bands used
by Wifi 6/6e/7.

To mitigate this, AMD has introduced a mechanism that devices can use to
notify active use of particular frequencies so that other devices can make
relative internal adjustments as necessary to avoid this resonance.

Co-Developed-by: Evan Quan 
Signed-off-by: Evan Quan 
Signed-off-by: Ma Jun 

--
v11:
 - fix typo(Simon)
v12:
 - Fix the code (Rafael)
 - Move amd_wbrf.c to drivers/platform/x86/amd/wbrf.c
 - Updated Evan's email because he's no longer at AMD.Thanks
for his work in earlier versions.
---
 drivers/platform/x86/amd/Kconfig  |  15 ++
 drivers/platform/x86/amd/Makefile |   1 +
 drivers/platform/x86/amd/wbrf.c   | 402 ++
 include/linux/acpi_amd_wbrf.h | 101 
 4 files changed, 519 insertions(+)
 create mode 100644 drivers/platform/x86/amd/wbrf.c
 create mode 100644 include/linux/acpi_amd_wbrf.h

diff --git a/drivers/platform/x86/amd/Kconfig b/drivers/platform/x86/amd/Kconfig
index d9685aef0887..fa5a978a2d22 100644
--- a/drivers/platform/x86/amd/Kconfig
+++ b/drivers/platform/x86/amd/Kconfig
@@ -32,3 +32,18 @@ config AMD_HSMP
 
  If you choose to compile this driver as a module the module will be
  called amd_hsmp.
+
+config AMD_WBRF
+   bool "AMD Wifi RF Band mitigations (WBRF)"
+   depends on ACPI
+   default n
+   help
+ WBRF(Wifi Band RFI mitigation) mechanism allows Wifi drivers
+ to notify the frequencies they are using so that other hardware
+ can be reconfigured to avoid harmonic conflicts.
+
+ AMD provides an ACPI based mechanism to support WBRF on platform with
+ appropriate underlying support.
+
+ This mechanism will only be activated on platforms that advertise a
+ need for it.
diff --git a/drivers/platform/x86/amd/Makefile 
b/drivers/platform/x86/amd/Makefile
index 65732f0a3913..62b98b048b17 100644
--- a/drivers/platform/x86/amd/Makefile
+++ b/drivers/platform/x86/amd/Makefile
@@ -9,3 +9,4 @@ obj-$(CONFIG_AMD_PMC)   += amd-pmc.o
 amd_hsmp-y := hsmp.o
 obj-$(CONFIG_AMD_HSMP) += amd_hsmp.o
 obj-$(CONFIG_AMD_PMF)  += pmf/
+obj-$(CONFIG_AMD_WBRF) += wbrf.o
diff --git a/drivers/platform/x86/amd/wbrf.c b/drivers/platform/x86/amd/wbrf.c
new file mode 100644
index ..fb414564f576
--- /dev/null
+++ b/drivers/platform/x86/amd/wbrf.c
@@ -0,0 +1,402 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wifi Frequency Band Manage Interface
+ * Copyright (C) 2023 Advanced Micro Devices
+ */
+
+#include 
+#include 
+
+#define ACPI_AMD_WBRF_METHOD   "\\WBRF"
+
+/*
+ * Functions bit vector for WBRF method
+ *
+ * Bit 0: WBRF supported.
+ * Bit 1: Function 1 (Add / Remove frequency) is supported.
+ * Bit 2: Function 2 (Get frequency list) is supported.
+ */
+#define WBRF_ENABLED   0x0
+#define WBRF_RECORD0x1
+#define WBRF_RETRIEVE  0x2
+
+#define WBRF_REVISION  0x1
+
+/*
+ * The data structure used for WBRF_RETRIEVE is not naturally aligned.
+ * And unfortunately the design has been settled down.
+ */
+struct amd_wbrf_ranges_out {
+   u32 num_of_ranges;
+   struct freq_band_range  band_list[MAX_NUM_OF_WBRF_RANGES];
+} __packed;
+
+static const guid_t wifi_acpi_dsm_guid =
+   GUID_INIT(0x7b7656cf, 0xdc3d, 0x4c1c,
+ 0x83, 0xe9, 0x66, 0xe7, 0x21, 0xde, 0x30, 0x70);
+
+/*
+ * Used to notify consumer (amdgpu driver currently) about
+ * the wifi frequency is change.
+ */
+static BLOCKING_NOTIFIER_HEAD(wbrf_chain_head);
+
+static int wbrf_record(struct acpi_device *adev, uint8_t action,
+  struct wbrf_ranges_in_out *in)
+{
+   union acpi_object argv4;
+   union acpi_object *tmp;
+   union acpi_object *obj;
+   u32 num_of_ranges = 0;
+   u32 num_of_elements;
+   u32 arg_idx = 0;
+   u32 loop_idx;
+   int ret;
+
+   if (!in)
+   return -EINVAL;
+
+   /*
+* The num_of_ranges value in the "in" object supplied by
+* the caller is required to be equal to the number of
+* entries in the band_list array in there.
+*/
+   for (loop_idx = 0; loop_idx < ARRAY_SIZE(in->band_list);
+loop_idx++)
+   if (in->band_list[loop_idx].start &&
+   in->band_list[loop_idx].end)
+   num_of_ranges++;
+
+   if (num_of_ranges != in->num_of_ranges)
+   return -EINVAL;
+
+   /*
+* Every input frequency band comes with two end points(start/end)
+* and each is accounted as an element. Meanwhile the range count
+* and action type are accounted as an element each.
+* So, the total element count = 2 * num_of_ranges + 1 + 1.
+ 

[PATCH v12 1/9] Documentation/driver-api: Add document about WBRF mechanism

2023-10-16 Thread Ma Jun
Add documentation about AMD's Wifi band RFI mitigation (WBRF) mechanism
explaining the theory and how it is used.

Signed-off-by: Ma Jun 
---
 Documentation/driver-api/wbrf.rst | 73 +++
 1 file changed, 73 insertions(+)
 create mode 100644 Documentation/driver-api/wbrf.rst

diff --git a/Documentation/driver-api/wbrf.rst 
b/Documentation/driver-api/wbrf.rst
new file mode 100644
index ..8561840263b3
--- /dev/null
+++ b/Documentation/driver-api/wbrf.rst
@@ -0,0 +1,73 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+
+=
+WBRF - Wifi Band RFI Mitigations
+=
+Due to electrical and mechanical constraints in certain platform designs
+there may be likely interference of relatively high-powered harmonics of
+the GPU memory clocks with local radio module frequency bands used by
+certain Wifi bands.
+
+To mitigate possible RFI interference producers can advertise the
+frequencies in use and consumers can use this information to avoid using
+these frequencies for sensitive features.
+
+When a platform is known to have this issue with any contained devices,
+the platform designer will advertise the availability of this feature via
+ACPI devices with a device specific method (_DSM).
+* Producers with this _DSM will be able to advertise the frequencies in use.
+* Consumers with this _DSM will be able to register for notifications of
+frequencies in use.
+
+Some general terms
+==
+Producer: such component who can produce high-powered radio frequency
+Consumer: such component who can adjust its in-use frequency in
+   response to the radio frequencies of other components to
+   mitigate the possible RFI.
+
+To make the mechanism function, those producers should notify active use
+of their particular frequencies so that other consumers can make relative
+internal adjustments as necessary to avoid this resonance.
+
+ACPI interface
+==
+Although initially used by for wifi + dGPU use cases, the ACPI interface
+can be scaled to any type of device that a platform designer discovers
+can cause interference.
+
+The GUID used for the _DSM is 7B7656CF-DC3D-4C1C-83E9-66E721DE3070.
+
+3 functions are available in this _DSM:
+
+* 0: discover # of functions available
+* 1: record RF bands in use
+* 2: retrieve RF bands in use
+
+Driver programming interface
+
+.. kernel-doc:: drivers/platform/x86/amd/wbrf.c
+
+Sample Usage
+=
+The expected flow for the producers:
+1) During probe, call `acpi_amd_wbrf_supported_producer` to check if WBRF
+can be enabled for the device.
+2) On using some frequency band, call `acpi_amd_wbrf_add_remove` with 'add'
+param to get other consumers properly notified.
+3) Or on stopping using some frequency band, call
+`acpi_amd_wbrf_add_remove` with 'remove' param to get other consumers notified.
+
+The expected flow for the consumers:
+1) During probe, call `acpi_amd_wbrf_supported_consumer` to check if WBRF
+can be enabled for the device.
+2) Call `amd_wbrf_register_notifier` to register for notification
+of frequency band change(add or remove) from other producers.
+3) Call the `amd_wbrf_retrieve_freq_band` intentionally to retrieve
+current active frequency bands considering some producers may broadcast
+such information before the consumer is up.
+4) On receiving a notification for frequency band change, run
+`amd_wbrf_retrieve_freq_band` again to retrieve the latest
+active frequency bands.
+5) During driver cleanup, call `amd_wbrf_unregister_notifier` to
+unregister the notifier.
-- 
2.34.1



[PATCH v12 0/9] Enable Wifi RFI interference mitigation feature support

2023-10-16 Thread Ma Jun
Due to electrical and mechanical constraints in certain platform designs there
may be likely interference of relatively high-powered harmonics of the (G-)DDR
memory clocks with local radio module frequency bands used by Wifi 6/6e/7. To
mitigate possible RFI interference we introuduced WBRF(Wifi Band RFI mitigation 
Feature).
Producers can advertise the frequencies in use and consumers can use this 
information
to avoid using these frequencies for sensitive features.

The whole patch set is based on Linux 6.5.0. With some brief introductions
as below:
Patch1:  Document about WBRF
Patch2:  Core functionality setup for WBRF feature support
Patch3 - 4:  Bring WBRF support to wifi subsystem.
Patch5 - 9:  Bring WBRF support to AMD graphics driver.

Evan Quan (7):
  cfg80211: expose nl80211_chan_width_to_mhz for wide sharing
  wifi: mac80211: Add support for WBRF features
  drm/amd/pm: update driver_if and ppsmc headers for coming wbrf feature
  drm/amd/pm: setup the framework to support Wifi RFI mitigation feature
  drm/amd/pm: add flood detection for wbrf events
  drm/amd/pm: enable Wifi RFI mitigation feature support for SMU13.0.0
  drm/amd/pm: enable Wifi RFI mitigation feature support for SMU13.0.7

Ma Jun (2):
  Documentation/driver-api: Add document about WBRF mechanism
  platform/x86/amd: Add support for AMD ACPI based Wifi band RFI
mitigation feature

 Documentation/driver-api/wbrf.rst |  71 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  17 +
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 214 +
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  33 ++
 .../inc/pmfw_if/smu13_driver_if_v13_0_0.h |  14 +-
 .../inc/pmfw_if/smu13_driver_if_v13_0_7.h |  14 +-
 .../pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h  |   3 +-
 .../pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h  |   3 +-
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h  |   3 +-
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h  |   3 +
 .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c|   9 +
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  |  60 +++
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c  |  59 +++
 drivers/gpu/drm/amd/pm/swsmu/smu_internal.h   |   3 +
 drivers/platform/x86/amd/Kconfig  |  15 +
 drivers/platform/x86/amd/Makefile |   1 +
 drivers/platform/x86/amd/wbrf.c   | 422 ++
 include/linux/acpi_amd_wbrf.h | 101 +
 include/linux/ieee80211.h |   1 +
 include/net/cfg80211.h|   8 +
 net/mac80211/Makefile |   2 +
 net/mac80211/chan.c   |   9 +
 net/mac80211/ieee80211_i.h|   9 +
 net/mac80211/main.c   |   2 +
 net/mac80211/wbrf.c   | 105 +
 net/wireless/chan.c   |   3 +-
 27 files changed, 1180 insertions(+), 6 deletions(-)
 create mode 100644 Documentation/driver-api/wbrf.rst
 create mode 100644 drivers/platform/x86/amd/wbrf.c
 create mode 100644 include/linux/acpi_amd_wbrf.h
 create mode 100644 net/mac80211/wbrf.c

-- 
2.34.1



[linux-next:master] BUILD REGRESSION 4d0515b235dec789578d135a5db586b25c5870cb

2023-10-16 Thread kernel test robot
tree/branch: 
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master
branch HEAD: 4d0515b235dec789578d135a5db586b25c5870cb  Add linux-next specific 
files for 20231016

Error/Warning reports:

https://lore.kernel.org/oe-kbuild-all/202309212121.cul1ptra-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202309212339.hxhbu2f1-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202310051547.40nm4sif-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202310160401.ricvn63p-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202310161945.vdy8eswa-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202310170132.irophgla-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202310170340.tkkfdzyn-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202310170627.2kvf6zhy-...@intel.com

Error/Warning: (recently discovered and may have been fixed)

(.text+0x11c): undefined reference to `devm_hwrng_register'
drivers/crypto/qcom-rng.c:213:(.text+0x16c): undefined reference to 
`devm_hwrng_register'
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c:274: warning: Function parameter or 
member 'gart_placement' not described in 'amdgpu_gmc_gart_location'
drivers/net/ethernet/apm/xgene/xgene_enet_main.c:2004:34: warning: unused 
variable 'xgene_enet_of_match' [-Wunused-const-variable]
idpf_txrx.c:(.text+0x2dbc): undefined reference to `tcp_gro_complete'
include/linux/bitmap.h:527:25: error: 'EBUSY' undeclared (first use in this 
function)
include/linux/bitmap.h:554:17: error: 'ENOMEM' undeclared (first use in this 
function)
kernel/bpf/helpers.c:1909:19: warning: no previous declaration for 
'bpf_percpu_obj_new_impl' [-Wmissing-declarations]
kernel/bpf/helpers.c:1945:18: warning: no previous declaration for 
'bpf_percpu_obj_drop_impl' [-Wmissing-declarations]
kernel/bpf/helpers.c:2480:18: warning: no previous declaration for 'bpf_throw' 
[-Wmissing-declarations]
qcom-rng.c:(.text+0x224): undefined reference to `devm_hwrng_register'

Unverified Error/Warning (likely false positive, please contact us if 
interested):

Documentation/devicetree/bindings/mfd/qcom-pm8xxx.yaml:

Error/Warning ids grouped by kconfigs:

gcc_recent_errors
|-- alpha-allyesconfig
|   `-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_gmc.c:warning:Function-parameter-or-member-gart_placement-not-described-in-amdgpu_gmc_gart_location
|-- arc-allmodconfig
|   `-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_gmc.c:warning:Function-parameter-or-member-gart_placement-not-described-in-amdgpu_gmc_gart_location
|-- arc-allyesconfig
|   `-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_gmc.c:warning:Function-parameter-or-member-gart_placement-not-described-in-amdgpu_gmc_gart_location
|-- arm-allmodconfig
|   `-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_gmc.c:warning:Function-parameter-or-member-gart_placement-not-described-in-amdgpu_gmc_gart_location
|-- arm-allyesconfig
|   `-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_gmc.c:warning:Function-parameter-or-member-gart_placement-not-described-in-amdgpu_gmc_gart_location
|-- arm-buildonly-randconfig-r006-20220608
|   `-- 
drivers-crypto-qcom-rng.c:(.text):undefined-reference-to-devm_hwrng_register
|-- arm-randconfig-s032-20220424
|   |-- 
include-linux-bitmap.h:error:EBUSY-undeclared-(first-use-in-this-function)
|   `-- 
include-linux-bitmap.h:error:ENOMEM-undeclared-(first-use-in-this-function)
|-- arm64-allmodconfig
|   `-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_gmc.c:warning:Function-parameter-or-member-gart_placement-not-described-in-amdgpu_gmc_gart_location
|-- arm64-allyesconfig
|   `-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_gmc.c:warning:Function-parameter-or-member-gart_placement-not-described-in-amdgpu_gmc_gart_location
|-- arm64-randconfig-001-20231016
|   |-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_gmc.c:warning:Function-parameter-or-member-gart_placement-not-described-in-amdgpu_gmc_gart_location
|   `-- idpf_txrx.c:(.text):undefined-reference-to-tcp_gro_complete
|-- arm64-randconfig-002-20231016
|   `-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_gmc.c:warning:Function-parameter-or-member-gart_placement-not-described-in-amdgpu_gmc_gart_location
|-- arm64-randconfig-r015-20220507
|   `-- qcom-rng.c:(.text):undefined-reference-to-devm_hwrng_register
|-- csky-allmodconfig
|   `-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_gmc.c:warning:Function-parameter-or-member-gart_placement-not-described-in-amdgpu_gmc_gart_location
|-- csky-allyesconfig
|   `-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_gmc.c:warning:Function-parameter-or-member-gart_placement-not-described-in-amdgpu_gmc_gart_location
|-- i386-randconfig-005-20231016
|   `-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_gmc.c:warning:Function-parameter-or-member-gart_placement-not-described-in-amdgpu_gmc_gart_location
|-- i386-randconfig-006-20231016
|   |-- 
drivers-gpu-drm-amd-amdgpu-amdgpu_gmc.c:warning:Function-parameter-or-member-gart_placement-not-described-in-amdgpu_gmc_gart_location
|   |-- 
kernel-bpf-helpers.c:warning:no

Re: [PATCH v4 4/4] drm/amd/amdgpu/vcn: Add RB decouple feature under SRIOV - P4

2023-10-16 Thread Leo Liu

The set looks good to me. The series is:

Reviewed-by: Leo Liu 

On 2023-10-16 12:54, Bokun Zhang wrote:

- In VCN 4 SRIOV code path, add code to enable RB decouple feature

Signed-off-by: Bokun Zhang 
---
  drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 71 +--
  1 file changed, 55 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index 88e17f5e20b2..bf07aa200030 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -176,9 +176,6 @@ static int vcn_v4_0_sw_init(void *handle)

AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING;
}
  
-		if (amdgpu_sriov_vf(adev))

-   fw_shared->present_flag_0 |= 
cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
-
if (amdgpu_vcnfw_log)
amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
}
@@ -1209,6 +1206,24 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
return 0;
  }
  
+static int vcn_v4_0_init_ring_metadata(struct amdgpu_device *adev, uint32_t vcn_inst, struct amdgpu_ring *ring_enc)

+{
+   struct amdgpu_vcn_rb_metadata *rb_metadata = NULL;
+   uint8_t *rb_ptr = (uint8_t *)ring_enc->ring;
+
+   rb_ptr += ring_enc->ring_size;
+   rb_metadata = (struct amdgpu_vcn_rb_metadata *)rb_ptr;
+
+   memset(rb_metadata, 0, sizeof(struct amdgpu_vcn_rb_metadata));
+   rb_metadata->size = sizeof(struct amdgpu_vcn_rb_metadata);
+   rb_metadata->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
+   rb_metadata->present_flag_0 |= 
cpu_to_le32(AMDGPU_VCN_VF_RB_DECOUPLE_FLAG);
+   rb_metadata->version = 1;
+   rb_metadata->ring_id = vcn_inst & 0xFF;
+
+   return 0;
+}
+
  static int vcn_v4_0_start_sriov(struct amdgpu_device *adev)
  {
int i;
@@ -1334,11 +1349,30 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device 
*adev)
rb_enc_addr = ring_enc->gpu_addr;
  
  		rb_setup->is_rb_enabled_flags |= RB_ENABLED;

-   rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
-   rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
-   rb_setup->rb_size = ring_enc->ring_size / 4;
fw_shared->present_flag_0 |= 
cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
  
+		if (amdgpu_sriov_is_vcn_rb_decouple(adev)) {

+   vcn_v4_0_init_ring_metadata(adev, i, ring_enc);
+
+   memset((void *)&rb_setup->rb_info, 0, sizeof(struct 
amdgpu_vcn_rb_setup_info) * MAX_NUM_VCN_RB_SETUP);
+   if (!(adev->vcn.harvest_config & (1 << 0))) {
+   rb_setup->rb_info[0].rb_addr_lo = 
lower_32_bits(adev->vcn.inst[0].ring_enc[0].gpu_addr);
+   rb_setup->rb_info[0].rb_addr_hi = 
upper_32_bits(adev->vcn.inst[0].ring_enc[0].gpu_addr);
+   rb_setup->rb_info[0].rb_size = 
adev->vcn.inst[0].ring_enc[0].ring_size / 4;
+   }
+   if (!(adev->vcn.harvest_config & (1 << 1))) {
+   rb_setup->rb_info[2].rb_addr_lo = 
lower_32_bits(adev->vcn.inst[1].ring_enc[0].gpu_addr);
+   rb_setup->rb_info[2].rb_addr_hi = 
upper_32_bits(adev->vcn.inst[1].ring_enc[0].gpu_addr);
+   rb_setup->rb_info[2].rb_size = 
adev->vcn.inst[1].ring_enc[0].ring_size / 4;
+   }
+   fw_shared->decouple.is_enabled = 1;
+   fw_shared->present_flag_0 |= 
cpu_to_le32(AMDGPU_VCN_VF_RB_DECOUPLE_FLAG);
+   } else {
+   rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
+   rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
+   rb_setup->rb_size = ring_enc->ring_size / 4;
+   }
+
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
@@ -1810,6 +1844,7 @@ static struct amdgpu_ring_funcs 
vcn_v4_0_unified_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_ENC,
.align_mask = 0x3f,
.nop = VCN_ENC_CMD_NO_OP,
+   .extra_dw = sizeof(struct amdgpu_vcn_rb_metadata),
.get_rptr = vcn_v4_0_unified_ring_get_rptr,
.get_wptr = vcn_v4_0_unified_ring_get_wptr,
.set_wptr = vcn_v4_0_unified_ring_set_wptr,
@@ -2023,16 +2058,20 @@ static int vcn_v4_0_process_interrupt(struct 
amdgpu_device *adev, struct amdgpu_
  {
uint32_t ip_instance;
  
-	switch (entry->client_id) {

-   case SOC15_IH_CLIENTID_VCN:
-   ip_instance = 0;
-   break;
-   case SOC15_IH_CLIENTID_VCN1:
-   ip_instance = 1;
-   break;
-   default:
-   DRM_ERROR("Unhandled client id: %d\n", entr

Re: [PATCH V2 2/2] drm/amdgpu: Permit PCIe transfer over links with XGMI

2023-10-16 Thread Felix Kuehling



On 2023-10-16 10:49, David Francis wrote:

When the CPU is XGMI connected, the PCIe links should
not be enumerated for topology purposes. However, PCIe
transfer should still be a valid option for remote
doorbells and MMIO mappings.

Move the XGMI connection check out of the shared helper
function amdgpu_device_is_peer_accessible and into the
topology path.

Signed-off-by: David Francis 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 +---
  drivers/gpu/drm/amd/amdkfd/kfd_topology.c  | 6 --
  2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index bad2b5577e96..b47cb7f8cfbd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5753,9 +5753,7 @@ bool amdgpu_device_is_peer_accessible(struct 
amdgpu_device *adev,
~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
resource_size_t aper_limit =
adev->gmc.aper_base + adev->gmc.aper_size - 1;
-   bool p2p_access =
-   !adev->gmc.xgmi.connected_to_cpu &&
-   !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
+   bool p2p_access = !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) 
< 0);
  
  	return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&

adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 4e530791507e..cb64c19482f3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1514,11 +1514,13 @@ static int kfd_dev_create_p2p_links(void)
goto next;
  
  		/* check if node(s) is/are peer accessible in one direction or bi-direction */

-   ret = kfd_add_peer_prop(new_dev, dev, i, k);
+   if (!new_dev->gpu->adev->gmc.xgmi.connected_to_cpu)


Yikes. I was thinking this should be something like

if (... xgmi.connected_to_cpu)
goto next;

I didn't consider that the check needs to be separate for each GPU. I 
mean, it's not exactly the same thing, but would it make sense to have a 
check like this?


if (new_dev->gpu->adev->gmc.xgmi.connected_to_cpu &&
dev->gpu->adev->gmc.xgmi.connected_to_cpu)
goto next;

I don't see why this should depend on the direction of the link. We 
don't want to advertise PCI P2P links between pairs of GPUs that are 
both connected to the CPU via XGMI. We don't currently support mixed 
systems with both XGMI and PCIe connected GPUs. But if that ever 
existed, I think we would want to allow P2P links between those, 
regardless of the direction.


Regards,
  Felix



+   ret = kfd_add_peer_prop(new_dev, dev, i, k);
if (ret < 0)
goto out;
  
-		ret = kfd_add_peer_prop(dev, new_dev, k, i);

+   if (!dev->gpu->adev->gmc.xgmi.connected_to_cpu)
+   ret = kfd_add_peer_prop(dev, new_dev, k, i);
if (ret < 0)
goto out;
  next:


Re: [PATCH V2 1/2] drm/amdgpu: Acquire ttm locks for dmaunmap

2023-10-16 Thread Felix Kuehling



On 2023-10-16 10:49, David Francis wrote:

dmaunmap can call ttm_bo_validate, which expects the
ttm dma_resv to be held.

Acquire the locks in amdgpu_amdkfd_gpuvm_dmaunmap_mem.

Because the dmaunmap step can now fail, the unmap ioctl UAPI
needs two new arguments. n_dmaunmap_success tracks the number
of devices that have completed dmaunmap. If a device fails
to dmaunmap due to a signal interrupt, n_dmaunmap_bos tracks
the number of bos on that device that were successfully
dmaunmapped.

This failure can also cause the sync_memory step of the ioctl
to be repeated; it is idempotent, so this should not cause any issues.

Signed-off-by: David Francis 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  2 +-
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 23 +++
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 14 +--
  include/uapi/linux/kfd_ioctl.h|  2 ++
  4 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 3ad8dc523b42..781642871900 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -302,7 +302,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct 
amdgpu_device *adev,
  struct kgd_mem *mem, void *drm_priv);
  int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
struct amdgpu_device *adev, struct kgd_mem *mem, void 
*drm_priv);
-void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv);
+int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv, 
uint32_t *num_bos);
  int amdgpu_amdkfd_gpuvm_sync_memory(
struct amdgpu_device *adev, struct kgd_mem *mem, bool intr);
  int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index a15e59abe70a..cbd6032f3d39 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2094,21 +2094,36 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
return ret;
  }
  
-void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv)

+int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv, 
uint32_t *num_bos)
  {
struct kfd_mem_attachment *entry;
struct amdgpu_vm *vm;
+   int ret;
+   int i = 0;
  
  	vm = drm_priv_to_vm(drm_priv);
  
  	mutex_lock(&mem->lock);
  
  	list_for_each_entry(entry, &mem->attachments, list) {

-   if (entry->bo_va->base.vm == vm)
-   kfd_mem_dmaunmap_attachment(mem, entry);
-   }
+   if (i >= *num_bos) {
+   ret = amdgpu_bo_reserve(entry->bo_va->base.bo, false);
+   if (ret) {
+   *num_bos = i;
+   goto out;
+   }
+
+   if (entry->bo_va->base.vm == vm)
+   kfd_mem_dmaunmap_attachment(mem, entry);
  
+			amdgpu_bo_unreserve(entry->bo_va->base.bo);

+   }
+   i++;
+   }
+   *num_bos = 0;
+out:
mutex_unlock(&mem->lock);
+   return ret;
  }
  
  int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 06988cf1db51..a944e255de4a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1379,6 +1379,10 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file 
*filep,
pr_debug("n_success exceeds n_devices\n");
return -EINVAL;
}
+   if (args->n_dmaunmap_success > args->n_devices) {
+   pr_debug("n_dmaunmap_success exceeds n_devices\n");
+   return -EINVAL;
+   }
  
  	devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),

GFP_KERNEL);
@@ -1434,7 +1438,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file 
*filep,
}
  
  	/* Flush TLBs after waiting for the page table updates to complete */

-   for (i = 0; i < args->n_devices; i++) {
+   for (i = args->n_dmaunmap_success; i < args->n_devices; i++) {
peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
if (WARN_ON_ONCE(!peer_pdd))
continue;
@@ -1442,7 +1446,12 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file 
*filep,
kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
  
  		/* Remove dma mapping after tlb flush to avoid IO_PAGE_FAULT */

-   amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv);
+   err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv, 
&args->n_dmaunmap_bos);
+   if (err) {
+ 

Re: [PATCH] drm/amdgpu: move task_info to amdgpu_fpriv

2023-10-16 Thread Felix Kuehling

On 2023-10-16 13:08, Shashank Sharma wrote:

This patch does the following:
- moves vm->task_info struct to fpriv->task_info.
- makes task_info allocation dynamic.
- adds reference counting support for task_info structure.
- adds some new helper functions to find and put task_info.
- adds respective supporting changes for existing get_task_info consumers.

Cc: Christian Koenig 
Cc: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h |  2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  4 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 28 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 16 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 87 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  9 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c   |  5 +-
  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 19 +++--
  drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c  | 18 +++--
  drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   | 17 ++--
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 19 +++--
  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c  | 19 ++---
  drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c| 20 ++---
  drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 14 ++--
  14 files changed, 186 insertions(+), 91 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index dc2d53081e80..a90780d38725 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -469,6 +469,8 @@ struct amdgpu_fpriv {
struct mutexbo_list_lock;
struct idr  bo_list_handles;
struct amdgpu_ctx_mgr   ctx_mgr;
+   struct amdgpu_task_info *task_info;
+
/** GPU partition selection */
uint32_txcp_id;
  };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index a4faea4fa0b5..6e9dcd13ee34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1763,9 +1763,11 @@ static int amdgpu_debugfs_vm_info_show(struct seq_file 
*m, void *unused)
list_for_each_entry(file, &dev->filelist, lhead) {
struct amdgpu_fpriv *fpriv = file->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
+   struct amdgpu_task_info *task_info = fpriv->task_info;
  
  		seq_printf(m, "pid:%d\tProcess:%s --\n",

-   vm->task_info.pid, vm->task_info.process_name);
+   task_info ? task_info->pid : 0,
+   task_info ? task_info->process_name : "");
r = amdgpu_bo_reserve(vm->root.bo, true);
if (r)
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 3f001a50b34a..b372a87b9b77 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4872,6 +4872,27 @@ static void amdgpu_reset_capture_coredumpm(struct 
amdgpu_device *adev)
dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_NOWAIT,
  amdgpu_devcoredump_read, amdgpu_devcoredump_free);
  }
+
+static void
+amdgpu_reset_dev_coredump(struct amdgpu_device *adev, struct 
amdgpu_reset_context *reset_context)
+{
+   struct amdgpu_task_info *ti;
+   struct amdgpu_vm *vm;
+
+   if (!reset_context->job || !reset_context->job->vm)
+   return;
+
+   vm = reset_context->job->vm;
+
+   /* Get reset task info and save a copy of data to be consumed later */
+   ti = amdgpu_vm_get_task_info(adev, vm->pasid);
+   if (ti) {
+   adev->reset_task_info = *ti;
+   amdgpu_reset_capture_coredumpm(adev);
+   }
+
+   amdgpu_vm_put_task_info(adev, vm->pasid);
+}
  #endif
  
  int amdgpu_do_asic_reset(struct list_head *device_list_handle,

@@ -4976,12 +4997,7 @@ int amdgpu_do_asic_reset(struct list_head 
*device_list_handle,
vram_lost = 
amdgpu_device_check_vram_lost(tmp_adev);
  #ifdef CONFIG_DEV_COREDUMP
tmp_adev->reset_vram_lost = vram_lost;
-   memset(&tmp_adev->reset_task_info, 0,
-   
sizeof(tmp_adev->reset_task_info));
-   if (reset_context->job && 
reset_context->job->vm)
-   tmp_adev->reset_task_info =
-   
reset_context->job->vm->task_info;
-   amdgpu_reset_capture_coredumpm(tmp_adev);
+   amdgpu_reset_dev_coredump(tmp_adev, 
reset_context);
  #endif
if (vram_lost) {
DRM_INFO("VRAM is lost due to GPU 
reset!\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drive

Re: [PATCH v6 6/6] drm/doc: Define KMS atomic state set

2023-10-16 Thread Simon Ser
On Monday, October 16th, 2023 at 17:10, Ville Syrjälä 
 wrote:

> On Mon, Oct 16, 2023 at 05:52:22PM +0300, Pekka Paalanen wrote:
> 
> > On Mon, 16 Oct 2023 15:42:16 +0200
> > André Almeida andrealm...@igalia.com wrote:
> > 
> > > Hi Pekka,
> > > 
> > > On 10/16/23 14:18, Pekka Paalanen wrote:
> > > 
> > > > On Mon, 16 Oct 2023 12:52:32 +0200
> > > > André Almeida andrealm...@igalia.com wrote:
> > > > 
> > > > > Hi Michel,
> > > > > 
> > > > > On 8/17/23 12:37, Michel Dänzer wrote:
> > > > > 
> > > > > > On 8/15/23 20:57, André Almeida wrote:
> > > > > > 
> > > > > > > From: Pekka Paalanen pekka.paala...@collabora.com
> > > > > > > 
> > > > > > > Specify how the atomic state is maintained between userspace and
> > > > > > > kernel, plus the special case for async flips.
> > > > > > > 
> > > > > > > Signed-off-by: Pekka Paalanen pekka.paala...@collabora.com
> > > > > > > Signed-off-by: André Almeida andrealm...@igalia.com
> > > > > > > [...]
> > > > > > 
> > > > > > > +An atomic commit with the flag DRM_MODE_PAGE_FLIP_ASYNC is 
> > > > > > > allowed to
> > > > > > > +effectively change only the FB_ID property on any planes. 
> > > > > > > No-operation changes
> > > > > > > +are ignored as always. [...]
> > > > > > > During the hackfest in Brno, it was mentioned that a commit which 
> > > > > > > re-sets the same FB_ID could actually have an effect with VRR: It 
> > > > > > > could trigger scanout of the next frame before vertical blank has 
> > > > > > > reached its maximum duration. Some kind of mechanism is required 
> > > > > > > for this in order to allow user space to perform low frame rate 
> > > > > > > compensation.
> > > > > 
> > > > > Xaver tested this hypothesis in a flipping the same fb in a VRR 
> > > > > monitor
> > > > > and it worked as expected, so this shouldn't be a concern.
> > > > > Right, so it must have some effect. It cannot be simply ignored like 
> > > > > in
> > > > > the proposed doc wording. Do we special-case re-setting the same FB_ID
> > > > > as "not a no-op" or "not ignored" or some other way?
> > > > > There's an effect in the refresh rate, the image won't change but it
> > > > > will report that a flip had happened asynchronously so the reported
> > > > > framerate will be increased. Maybe an additional wording could be 
> > > > > like:
> > > 
> > > Flipping to the same FB_ID will result in a immediate flip as if it was
> > > changing to a different one, with no effect on the image but effecting
> > > the reported frame rate.
> > 
> > Re-setting FB_ID to its current value is a special case regardless of
> > PAGE_FLIP_ASYNC, is it not?
> 
> No. The rule has so far been that all side effects are observed
> even if you flip to the same fb. And that is one of my annoyances
> with this proposal. The rules will now be different for async flips
> vs. everything else.

Well with the patches the async page-flip case is exactly the same as
the non-async page-flip case. In both cases, if a FB_ID is included in
an atomic commit then the side effects are triggered even if the property
value didn't change. The rules are the same for everything.


Re: [PATCH v4 1/4] drm/amd/amdgpu/vcn: Add RB decouple feature under SRIOV - P1

2023-10-16 Thread Zhang, Bokun
[AMD Official Use Only - General]

Hey Chris,
Yes, we use bitfield as flags to control certain features.

From: Christian König 
Sent: Monday, October 16, 2023 1:58 PM
To: Zhang, Bokun ; amd-gfx@lists.freedesktop.org 

Subject: Re: [PATCH v4 1/4] drm/amd/amdgpu/vcn: Add RB decouple feature under 
SRIOV - P1

Am 16.10.23 um 18:52 schrieb Bokun Zhang:
> - Update SRIOV header with RB decouple flag
>
> Signed-off-by: Bokun Zhang 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h | 5 +++--
>   1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
> index 104a5ad8397d..51a14f6d93bd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
> @@ -90,10 +90,11 @@ union amd_sriov_msg_feature_flags {
>uint32_t host_load_ucodes  : 1;
>uint32_t host_flr_vramlost : 1;
>uint32_t mm_bw_management  : 1;
> - uint32_t pp_one_vf_mode: 1;
> + uint32_t pp_one_vf_mode: 1;
>uint32_t reg_indirect_acc  : 1;
>uint32_t av1_support   : 1;
> - uint32_t reserved  : 25;
> + uint32_t vcn_rb_decouple   : 1;
> + uint32_t reserved  : 24;

Are you guys using bitfields for SRIOV guest<->host communication?

Christian.

>} flags;
>uint32_t all;
>   };



[PATCH] drm/amdgpu: update to the latest GC 11.5 headers

2023-10-16 Thread Alex Deucher
Add some additional bitfields.

Signed-off-by: Alex Deucher 
---
 .../include/asic_reg/gc/gc_11_5_0_sh_mask.h   | 48 +++
 1 file changed, 48 insertions(+)

diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_5_0_sh_mask.h 
b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_5_0_sh_mask.h
index 3404bf10428d..f10e5d1f592b 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_5_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_5_0_sh_mask.h
@@ -19227,6 +19227,9 @@
 #define CB_COLOR0_FDCC_CONTROL__FDCC_ENABLE__SHIFT 
   0x16
 #define CB_COLOR0_FDCC_CONTROL__DCC_COMPRESS_DISABLE__SHIFT
   0x17
 #define CB_COLOR0_FDCC_CONTROL__FRAGMENT_COMPRESS_DISABLE__SHIFT   
   0x18
+#define CB_COLOR0_FDCC_CONTROL__DISABLE_OVERRIDE_INCONSISTENT_KEYS__SHIFT  
   0x19
+#define CB_COLOR0_FDCC_CONTROL__ENABLE_MAX_COMP_FRAG_OVERRIDE__SHIFT   
   0x1a
+#define CB_COLOR0_FDCC_CONTROL__MAX_COMP_FRAGS__SHIFT  
   0x1b
 #define CB_COLOR0_FDCC_CONTROL__SAMPLE_MASK_TRACKER_DISABLE_MASK   
   0x0001L
 #define CB_COLOR0_FDCC_CONTROL__SAMPLE_MASK_TRACKER_FEA_FORCE_MASK 
   0x0002L
 #define CB_COLOR0_FDCC_CONTROL__MAX_UNCOMPRESSED_BLOCK_SIZE_MASK   
   0x000CL
@@ -19241,6 +19244,9 @@
 #define CB_COLOR0_FDCC_CONTROL__FDCC_ENABLE_MASK   
   0x0040L
 #define CB_COLOR0_FDCC_CONTROL__DCC_COMPRESS_DISABLE_MASK  
   0x0080L
 #define CB_COLOR0_FDCC_CONTROL__FRAGMENT_COMPRESS_DISABLE_MASK 
   0x0100L
+#define CB_COLOR0_FDCC_CONTROL__DISABLE_OVERRIDE_INCONSISTENT_KEYS_MASK
   0x0200L
+#define CB_COLOR0_FDCC_CONTROL__ENABLE_MAX_COMP_FRAG_OVERRIDE_MASK 
   0x0400L
+#define CB_COLOR0_FDCC_CONTROL__MAX_COMP_FRAGS_MASK
   0x3800L
 //CB_COLOR0_DCC_BASE
 #define CB_COLOR0_DCC_BASE__BASE_256B__SHIFT   
   0x0
 #define CB_COLOR0_DCC_BASE__BASE_256B_MASK 
   0xL
@@ -19301,6 +19307,9 @@
 #define CB_COLOR1_FDCC_CONTROL__FDCC_ENABLE__SHIFT 
   0x16
 #define CB_COLOR1_FDCC_CONTROL__DCC_COMPRESS_DISABLE__SHIFT
   0x17
 #define CB_COLOR1_FDCC_CONTROL__FRAGMENT_COMPRESS_DISABLE__SHIFT   
   0x18
+#define CB_COLOR1_FDCC_CONTROL__DISABLE_OVERRIDE_INCONSISTENT_KEYS__SHIFT  
   0x19
+#define CB_COLOR1_FDCC_CONTROL__ENABLE_MAX_COMP_FRAG_OVERRIDE__SHIFT   
   0x1a
+#define CB_COLOR1_FDCC_CONTROL__MAX_COMP_FRAGS__SHIFT  
   0x1b
 #define CB_COLOR1_FDCC_CONTROL__SAMPLE_MASK_TRACKER_DISABLE_MASK   
   0x0001L
 #define CB_COLOR1_FDCC_CONTROL__SAMPLE_MASK_TRACKER_FEA_FORCE_MASK 
   0x0002L
 #define CB_COLOR1_FDCC_CONTROL__MAX_UNCOMPRESSED_BLOCK_SIZE_MASK   
   0x000CL
@@ -19315,6 +19324,9 @@
 #define CB_COLOR1_FDCC_CONTROL__FDCC_ENABLE_MASK   
   0x0040L
 #define CB_COLOR1_FDCC_CONTROL__DCC_COMPRESS_DISABLE_MASK  
   0x0080L
 #define CB_COLOR1_FDCC_CONTROL__FRAGMENT_COMPRESS_DISABLE_MASK 
   0x0100L
+#define CB_COLOR1_FDCC_CONTROL__DISABLE_OVERRIDE_INCONSISTENT_KEYS_MASK
   0x0200L
+#define CB_COLOR1_FDCC_CONTROL__ENABLE_MAX_COMP_FRAG_OVERRIDE_MASK 
   0x0400L
+#define CB_COLOR1_FDCC_CONTROL__MAX_COMP_FRAGS_MASK
   0x3800L
 //CB_COLOR1_DCC_BASE
 #define CB_COLOR1_DCC_BASE__BASE_256B__SHIFT   
   0x0
 #define CB_COLOR1_DCC_BASE__BASE_256B_MASK 
   0xL
@@ -19375,6 +19387,9 @@
 #define CB_COLOR2_FDCC_CONTROL__FDCC_ENABLE__SHIFT 
   0x16
 #define CB_COLOR2_FDCC_CONTROL__DCC_COMPRESS_DISABLE__SHIFT
   0x17
 #def

Re: [PATCH v4 1/4] drm/amd/amdgpu/vcn: Add RB decouple feature under SRIOV - P1

2023-10-16 Thread Christian König

Am 16.10.23 um 18:52 schrieb Bokun Zhang:

- Update SRIOV header with RB decouple flag

Signed-off-by: Bokun Zhang 
---
  drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h | 5 +++--
  1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h 
b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
index 104a5ad8397d..51a14f6d93bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -90,10 +90,11 @@ union amd_sriov_msg_feature_flags {
uint32_t host_load_ucodes  : 1;
uint32_t host_flr_vramlost : 1;
uint32_t mm_bw_management  : 1;
-   uint32_t pp_one_vf_mode: 1;
+   uint32_t pp_one_vf_mode: 1;
uint32_t reg_indirect_acc  : 1;
uint32_t av1_support   : 1;
-   uint32_t reserved  : 25;
+   uint32_t vcn_rb_decouple   : 1;
+   uint32_t reserved  : 24;


Are you guys using bitfields for SRIOV guest<->host communication?

Christian.


} flags;
uint32_t all;
  };




Re: [PATCH 2/2] drm/amd/display: Fix stack size issue on DML2

2023-10-16 Thread Alex Deucher
On Mon, Oct 16, 2023 at 10:27 AM Rodrigo Siqueira
 wrote:
>
> This commit is the last part of the fix that reduces the stack size in
> the DML2 code.
>
> Cc: Stephen Rothwell 
> Cc: Alex Deucher 
> Cc: Roman Li 
> Cc: Chaitanya Dhere 
> Fixes: a2815ada8616 ("drm/amd/display: Introduce DML2")
> Signed-off-by: Rodrigo Siqueira 

Series is:
Acked-by: Alex Deucher 

> ---
>  .../amd/display/dc/dml2/display_mode_core.c   | 99 ++-
>  1 file changed, 54 insertions(+), 45 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c 
> b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
> index 06358b7fe38b..851db026f251 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
> +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
> @@ -6208,6 +6208,58 @@ static dml_uint_t CalculateMaxVStartup(
> return max_vstartup_lines;
>  }
>
> +static void set_calculate_prefetch_schedule_params(struct 
> display_mode_lib_st *mode_lib,
> +  struct 
> CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params,
> +  dml_uint_t j,
> +  dml_uint_t k)
> +{
> +   CalculatePrefetchSchedule_params->DSCDelay = 
> mode_lib->ms.DSCDelayPerState[k];
> +   
> CalculatePrefetchSchedule_params->EnhancedPrefetchScheduleAccelerationFinal = 
> mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal;
> +   
> CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = 
> mode_lib->ms.ip.dppclk_delay_subtotal + 
> mode_lib->ms.ip.dppclk_delay_cnvc_formatter;
> +   
> CalculatePrefetchSchedule_params->DPPCLKDelaySCL = 
> mode_lib->ms.ip.dppclk_delay_scl;
> +   
> CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = 
> mode_lib->ms.ip.dppclk_delay_scl_lb_only;
> +   
> CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = 
> mode_lib->ms.ip.dppclk_delay_cnvc_cursor;
> +   
> CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = 
> mode_lib->ms.ip.dispclk_delay_subtotal;
> +   
> CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = 
> (dml_uint_t)(mode_lib->ms.SwathWidthYThisState[k] / 
> mode_lib->ms.cache_display_cfg.plane.HRatio[k]);
> +   
> CalculatePrefetchSchedule_params->OutputFormat = 
> mode_lib->ms.cache_display_cfg.output.OutputFormat[k];
> +   
> CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = 
> mode_lib->ms.ip.max_inter_dcn_tile_repeaters;
> +   
> CalculatePrefetchSchedule_params->GPUVMPageTableLevels = 
> mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
> +   CalculatePrefetchSchedule_params->GPUVMEnable 
> = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
> +   
> CalculatePrefetchSchedule_params->HostVMEnable = 
> mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
> +   
> CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = 
> mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
> +   
> CalculatePrefetchSchedule_params->HostVMMinPageSize = 
> mode_lib->ms.soc.hostvm_min_page_size_kbytes;
> +   
> CalculatePrefetchSchedule_params->DynamicMetadataEnable = 
> mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k];
> +   
> CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = 
> mode_lib->ms.ip.dynamic_metadata_vm_enabled;
> +   
> CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = 
> mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k];
> +   
> CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = 
> mode_lib->ms.cache_display_cfg.plane.DynamicMetadataTransmittedBytes[k];
> +   
> CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency;
> +   
> CalculatePrefetchSchedule_params->UrgentExtraLatency = 
> mode_lib->ms.ExtraLatency;
> +   CalculatePrefetchSchedule_params->TCalc = 
> mode_lib->ms.TimeCalc;
> +   
> CalculatePrefetchSchedule_params->PDEAndMetaPTEBytesFrame = 
> mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k];
> +   CalculatePrefetchSchedule_params->MetaRowByte 
> = mode_lib->ms.MetaRowBytes[j][k];
> +   
> CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = 
> mode_lib->ms.DPTEBytesPerRow[j][k];
> +

[PATCH] drm/amdgpu: move task_info to amdgpu_fpriv

2023-10-16 Thread Shashank Sharma
This patch does the following:
- moves vm->task_info struct to fpriv->task_info.
- makes task_info allocation dynamic.
- adds reference counting support for task_info structure.
- adds some new helper functions to find and put task_info.
- adds respective supporting changes for existing get_task_info consumers.

Cc: Christian Koenig 
Cc: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h |  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 28 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 16 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 87 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  9 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c   |  5 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 19 +++--
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c  | 18 +++--
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   | 17 ++--
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 19 +++--
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c  | 19 ++---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c| 20 ++---
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 14 ++--
 14 files changed, 186 insertions(+), 91 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index dc2d53081e80..a90780d38725 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -469,6 +469,8 @@ struct amdgpu_fpriv {
struct mutexbo_list_lock;
struct idr  bo_list_handles;
struct amdgpu_ctx_mgr   ctx_mgr;
+   struct amdgpu_task_info *task_info;
+
/** GPU partition selection */
uint32_txcp_id;
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index a4faea4fa0b5..6e9dcd13ee34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1763,9 +1763,11 @@ static int amdgpu_debugfs_vm_info_show(struct seq_file 
*m, void *unused)
list_for_each_entry(file, &dev->filelist, lhead) {
struct amdgpu_fpriv *fpriv = file->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
+   struct amdgpu_task_info *task_info = fpriv->task_info;
 
seq_printf(m, "pid:%d\tProcess:%s --\n",
-   vm->task_info.pid, vm->task_info.process_name);
+   task_info ? task_info->pid : 0,
+   task_info ? task_info->process_name : "");
r = amdgpu_bo_reserve(vm->root.bo, true);
if (r)
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 3f001a50b34a..b372a87b9b77 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4872,6 +4872,27 @@ static void amdgpu_reset_capture_coredumpm(struct 
amdgpu_device *adev)
dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_NOWAIT,
  amdgpu_devcoredump_read, amdgpu_devcoredump_free);
 }
+
+static void
+amdgpu_reset_dev_coredump(struct amdgpu_device *adev, struct 
amdgpu_reset_context *reset_context)
+{
+   struct amdgpu_task_info *ti;
+   struct amdgpu_vm *vm;
+
+   if (!reset_context->job || !reset_context->job->vm)
+   return;
+
+   vm = reset_context->job->vm;
+
+   /* Get reset task info and save a copy of data to be consumed later */
+   ti = amdgpu_vm_get_task_info(adev, vm->pasid);
+   if (ti) {
+   adev->reset_task_info = *ti;
+   amdgpu_reset_capture_coredumpm(adev);
+   }
+
+   amdgpu_vm_put_task_info(adev, vm->pasid);
+}
 #endif
 
 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
@@ -4976,12 +4997,7 @@ int amdgpu_do_asic_reset(struct list_head 
*device_list_handle,
vram_lost = 
amdgpu_device_check_vram_lost(tmp_adev);
 #ifdef CONFIG_DEV_COREDUMP
tmp_adev->reset_vram_lost = vram_lost;
-   memset(&tmp_adev->reset_task_info, 0,
-   
sizeof(tmp_adev->reset_task_info));
-   if (reset_context->job && 
reset_context->job->vm)
-   tmp_adev->reset_task_info =
-   
reset_context->job->vm->task_info;
-   amdgpu_reset_capture_coredumpm(tmp_adev);
+   amdgpu_reset_dev_coredump(tmp_adev, 
reset_context);
 #endif
if (vram_lost) {
DRM_INFO("VRAM is lost due to GPU 
reset!\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 78476bc75b4e..99cf

[PATCH v4 4/4] drm/amd/amdgpu/vcn: Add RB decouple feature under SRIOV - P4

2023-10-16 Thread Bokun Zhang
- In VCN 4 SRIOV code path, add code to enable RB decouple feature

Signed-off-by: Bokun Zhang 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 71 +--
 1 file changed, 55 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index 88e17f5e20b2..bf07aa200030 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -176,9 +176,6 @@ static int vcn_v4_0_sw_init(void *handle)

AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING;
}
 
-   if (amdgpu_sriov_vf(adev))
-   fw_shared->present_flag_0 |= 
cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
-
if (amdgpu_vcnfw_log)
amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
}
@@ -1209,6 +1206,24 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
return 0;
 }
 
+static int vcn_v4_0_init_ring_metadata(struct amdgpu_device *adev, uint32_t 
vcn_inst, struct amdgpu_ring *ring_enc)
+{
+   struct amdgpu_vcn_rb_metadata *rb_metadata = NULL;
+   uint8_t *rb_ptr = (uint8_t *)ring_enc->ring;
+
+   rb_ptr += ring_enc->ring_size;
+   rb_metadata = (struct amdgpu_vcn_rb_metadata *)rb_ptr;
+
+   memset(rb_metadata, 0, sizeof(struct amdgpu_vcn_rb_metadata));
+   rb_metadata->size = sizeof(struct amdgpu_vcn_rb_metadata);
+   rb_metadata->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
+   rb_metadata->present_flag_0 |= 
cpu_to_le32(AMDGPU_VCN_VF_RB_DECOUPLE_FLAG);
+   rb_metadata->version = 1;
+   rb_metadata->ring_id = vcn_inst & 0xFF;
+
+   return 0;
+}
+
 static int vcn_v4_0_start_sriov(struct amdgpu_device *adev)
 {
int i;
@@ -1334,11 +1349,30 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device 
*adev)
rb_enc_addr = ring_enc->gpu_addr;
 
rb_setup->is_rb_enabled_flags |= RB_ENABLED;
-   rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
-   rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
-   rb_setup->rb_size = ring_enc->ring_size / 4;
fw_shared->present_flag_0 |= 
cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
 
+   if (amdgpu_sriov_is_vcn_rb_decouple(adev)) {
+   vcn_v4_0_init_ring_metadata(adev, i, ring_enc);
+
+   memset((void *)&rb_setup->rb_info, 0, sizeof(struct 
amdgpu_vcn_rb_setup_info) * MAX_NUM_VCN_RB_SETUP);
+   if (!(adev->vcn.harvest_config & (1 << 0))) {
+   rb_setup->rb_info[0].rb_addr_lo = 
lower_32_bits(adev->vcn.inst[0].ring_enc[0].gpu_addr);
+   rb_setup->rb_info[0].rb_addr_hi = 
upper_32_bits(adev->vcn.inst[0].ring_enc[0].gpu_addr);
+   rb_setup->rb_info[0].rb_size = 
adev->vcn.inst[0].ring_enc[0].ring_size / 4;
+   }
+   if (!(adev->vcn.harvest_config & (1 << 1))) {
+   rb_setup->rb_info[2].rb_addr_lo = 
lower_32_bits(adev->vcn.inst[1].ring_enc[0].gpu_addr);
+   rb_setup->rb_info[2].rb_addr_hi = 
upper_32_bits(adev->vcn.inst[1].ring_enc[0].gpu_addr);
+   rb_setup->rb_info[2].rb_size = 
adev->vcn.inst[1].ring_enc[0].ring_size / 4;
+   }
+   fw_shared->decouple.is_enabled = 1;
+   fw_shared->present_flag_0 |= 
cpu_to_le32(AMDGPU_VCN_VF_RB_DECOUPLE_FLAG);
+   } else {
+   rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
+   rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
+   rb_setup->rb_size = ring_enc->ring_size / 4;
+   }
+
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
@@ -1810,6 +1844,7 @@ static struct amdgpu_ring_funcs 
vcn_v4_0_unified_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_ENC,
.align_mask = 0x3f,
.nop = VCN_ENC_CMD_NO_OP,
+   .extra_dw = sizeof(struct amdgpu_vcn_rb_metadata),
.get_rptr = vcn_v4_0_unified_ring_get_rptr,
.get_wptr = vcn_v4_0_unified_ring_get_wptr,
.set_wptr = vcn_v4_0_unified_ring_set_wptr,
@@ -2023,16 +2058,20 @@ static int vcn_v4_0_process_interrupt(struct 
amdgpu_device *adev, struct amdgpu_
 {
uint32_t ip_instance;
 
-   switch (entry->client_id) {
-   case SOC15_IH_CLIENTID_VCN:
-   ip_instance = 0;
-   break;
-   case SOC15_IH_CLIENTID_VCN1:
-   ip_instance = 1;
-   break;
-   default:
-   DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
-   return 0;
+   if (amdgpu_sriov_is_vcn_rb_de

[PATCH v4 3/4] drm/amd/amdgpu/vcn: Add RB decouple feature under SRIOV - P3

2023-10-16 Thread Bokun Zhang
- Update VCN header for RB decouple feature
- Add metadata struct, metadata will be placed after each RB

Signed-off-by: Bokun Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 50 +
 1 file changed, 43 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 0815c5a97564..0702ffc1d20e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -169,6 +169,9 @@
 #define AMDGPU_VCN_SMU_VERSION_INFO_FLAG (1 << 11)
 #define AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG (1 << 11)
 #define AMDGPU_VCN_VF_RB_SETUP_FLAG (1 << 14)
+#define AMDGPU_VCN_VF_RB_DECOUPLE_FLAG (1 << 15)
+
+#define MAX_NUM_VCN_RB_SETUP 4
 
 #define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER   0x0001
 #define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER 0x0001
@@ -335,22 +338,44 @@ struct amdgpu_fw_shared {
struct amdgpu_fw_shared_smu_interface_info smu_interface_info;
 };
 
+struct amdgpu_vcn_rb_setup_info
+{
+uint32_t  rb_addr_lo;
+uint32_t  rb_addr_hi;
+uint32_t  rb_size;
+};
+
 struct amdgpu_fw_shared_rb_setup {
uint32_t is_rb_enabled_flags;
-   uint32_t rb_addr_lo;
-   uint32_t rb_addr_hi;
-   uint32_t  rb_size;
-   uint32_t  rb4_addr_lo;
-   uint32_t  rb4_addr_hi;
-   uint32_t  rb4_size;
-   uint32_t  reserved[6];
+
+   union {
+   struct {
+   uint32_t rb_addr_lo;
+   uint32_t rb_addr_hi;
+   uint32_t  rb_size;
+   uint32_t  rb4_addr_lo;
+   uint32_t  rb4_addr_hi;
+   uint32_t  rb4_size;
+   uint32_t  reserved[6];
+   };
+
+   struct {
+   struct amdgpu_vcn_rb_setup_info 
rb_info[MAX_NUM_VCN_RB_SETUP];
+   };
+   };
 };
 
+
 struct amdgpu_fw_shared_drm_key_wa {
uint8_t  method;
uint8_t  reserved[3];
 };
 
+struct amdgpu_fw_shared_queue_decouple {
+uint8_t  is_enabled;
+uint8_t  reserved[7];
+};
+
 struct amdgpu_vcn4_fw_shared {
uint32_t present_flag_0;
uint8_t pad[12];
@@ -361,6 +386,8 @@ struct amdgpu_vcn4_fw_shared {
struct amdgpu_fw_shared_rb_setup rb_setup;
struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface;
struct amdgpu_fw_shared_drm_key_wa drm_key_wa;
+   uint8_t pad3[9];
+   struct amdgpu_fw_shared_queue_decouple decouple;
 };
 
 struct amdgpu_vcn_fwlog {
@@ -378,6 +405,15 @@ struct amdgpu_vcn_decode_buffer {
uint32_t pad[30];
 };
 
+struct amdgpu_vcn_rb_metadata {
+   uint32_t size;
+   uint32_t present_flag_0;
+
+   uint8_t version;
+   uint8_t ring_id;
+   uint8_t pad[26];
+};
+
 #define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80
 #define VCN_BLOCK_DECODE_DISABLE_MASK 0x40
 #define VCN_BLOCK_QUEUE_DISABLE_MASK 0xC0
-- 
2.34.1



[PATCH v4 2/4] drm/amd/amdgpu/vcn: Add RB decouple feature under SRIOV - P2

2023-10-16 Thread Bokun Zhang
- Add function to check if RB decouple is enabled under SRIOV

Signed-off-by: Bokun Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index fabb83e9d9ae..858ef21ae515 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -126,6 +126,8 @@ enum AMDGIM_FEATURE_FLAG {
AMDGIM_FEATURE_INDIRECT_REG_ACCESS = (1 << 5),
/* AV1 Support MODE*/
AMDGIM_FEATURE_AV1_SUPPORT = (1 << 6),
+   /* VCN RB decouple */
+   AMDGIM_FEATURE_VCN_RB_DECOUPLE = (1 << 7),
 };
 
 enum AMDGIM_REG_ACCESS_FLAG {
@@ -326,6 +328,8 @@ static inline bool is_virtual_machine(void)
((!amdgpu_in_reset(adev)) && (!adev->virt.tdr_debug))
 #define amdgpu_sriov_is_av1_support(adev) \
((adev)->virt.gim_feature & AMDGIM_FEATURE_AV1_SUPPORT)
+#define amdgpu_sriov_is_vcn_rb_decouple(adev) \
+   ((adev)->virt.gim_feature & AMDGIM_FEATURE_VCN_RB_DECOUPLE)
 bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
 void amdgpu_virt_init_setting(struct amdgpu_device *adev);
 void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
-- 
2.34.1



[PATCH v4 1/4] drm/amd/amdgpu/vcn: Add RB decouple feature under SRIOV - P1

2023-10-16 Thread Bokun Zhang
- Update SRIOV header with RB decouple flag

Signed-off-by: Bokun Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h 
b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
index 104a5ad8397d..51a14f6d93bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -90,10 +90,11 @@ union amd_sriov_msg_feature_flags {
uint32_t host_load_ucodes  : 1;
uint32_t host_flr_vramlost : 1;
uint32_t mm_bw_management  : 1;
-   uint32_t pp_one_vf_mode: 1;
+   uint32_t pp_one_vf_mode: 1;
uint32_t reg_indirect_acc  : 1;
uint32_t av1_support   : 1;
-   uint32_t reserved  : 25;
+   uint32_t vcn_rb_decouple   : 1;
+   uint32_t reserved  : 24;
} flags;
uint32_t all;
 };
-- 
2.34.1



Re: [PATCH v6 6/6] drm/doc: Define KMS atomic state set

2023-10-16 Thread Ville Syrjälä
On Mon, Oct 16, 2023 at 05:52:22PM +0300, Pekka Paalanen wrote:
> On Mon, 16 Oct 2023 15:42:16 +0200
> André Almeida  wrote:
> 
> > Hi Pekka,
> > 
> > On 10/16/23 14:18, Pekka Paalanen wrote:
> > > On Mon, 16 Oct 2023 12:52:32 +0200
> > > André Almeida  wrote:
> > >  
> > >> Hi Michel,
> > >>
> > >> On 8/17/23 12:37, Michel Dänzer wrote:  
> > >>> On 8/15/23 20:57, André Almeida wrote:  
> >  From: Pekka Paalanen 
> > 
> >  Specify how the atomic state is maintained between userspace and
> >  kernel, plus the special case for async flips.
> > 
> >  Signed-off-by: Pekka Paalanen 
> >  Signed-off-by: André Almeida   
> > >>> [...]
> > >>> 
> >  +An atomic commit with the flag DRM_MODE_PAGE_FLIP_ASYNC is allowed to
> >  +effectively change only the FB_ID property on any planes. 
> >  No-operation changes
> >  +are ignored as always. [...]  
> > >>> During the hackfest in Brno, it was mentioned that a commit which 
> > >>> re-sets the same FB_ID could actually have an effect with VRR: It could 
> > >>> trigger scanout of the next frame before vertical blank has reached its 
> > >>> maximum duration. Some kind of mechanism is required for this in order 
> > >>> to allow user space to perform low frame rate compensation.
> > >>> 
> > >> Xaver tested this hypothesis in a flipping the same fb in a VRR monitor
> > >> and it worked as expected, so this shouldn't be a concern.  
> > > Right, so it must have some effect. It cannot be simply ignored like in
> > > the proposed doc wording. Do we special-case re-setting the same FB_ID
> > > as "not a no-op" or "not ignored" or some other way?  
> > There's an effect in the refresh rate, the image won't change but it 
> > will report that a flip had happened asynchronously so the reported 
> > framerate will be increased. Maybe an additional wording could be like:
> > 
> > Flipping to the same FB_ID will result in a immediate flip as if it was 
> > changing to a different one, with no effect on the image but effecting 
> > the reported frame rate.
> 
> Re-setting FB_ID to its current value is a special case regardless of
> PAGE_FLIP_ASYNC, is it not?

No. The rule has so far been that all side effects are observed
even if you flip to the same fb. And that is one of my annoyances
with this proposal. The rules will now be different for async flips
vs. everything else.

The other issues (mainly relating to hardware where not all planes
support async flips) I've already highlighted in some earlier mail.

-- 
Ville Syrjälä
Intel


Re: [PATCH v6 6/6] drm/doc: Define KMS atomic state set

2023-10-16 Thread André Almeida



On 10/16/23 16:52, Pekka Paalanen wrote:

On Mon, 16 Oct 2023 15:42:16 +0200
André Almeida  wrote:


Hi Pekka,

On 10/16/23 14:18, Pekka Paalanen wrote:

On Mon, 16 Oct 2023 12:52:32 +0200
André Almeida  wrote:
  

Hi Michel,

On 8/17/23 12:37, Michel Dänzer wrote:

On 8/15/23 20:57, André Almeida wrote:

From: Pekka Paalanen 

Specify how the atomic state is maintained between userspace and
kernel, plus the special case for async flips.

Signed-off-by: Pekka Paalanen 
Signed-off-by: André Almeida 

[...]
 

+An atomic commit with the flag DRM_MODE_PAGE_FLIP_ASYNC is allowed to
+effectively change only the FB_ID property on any planes. No-operation changes
+are ignored as always. [...]

During the hackfest in Brno, it was mentioned that a commit which re-sets the 
same FB_ID could actually have an effect with VRR: It could trigger scanout of 
the next frame before vertical blank has reached its maximum duration. Some 
kind of mechanism is required for this in order to allow user space to perform 
low frame rate compensation.
 

Xaver tested this hypothesis in a flipping the same fb in a VRR monitor
and it worked as expected, so this shouldn't be a concern.

Right, so it must have some effect. It cannot be simply ignored like in
the proposed doc wording. Do we special-case re-setting the same FB_ID
as "not a no-op" or "not ignored" or some other way?

There's an effect in the refresh rate, the image won't change but it
will report that a flip had happened asynchronously so the reported
framerate will be increased. Maybe an additional wording could be like:

Flipping to the same FB_ID will result in a immediate flip as if it was
changing to a different one, with no effect on the image but effecting
the reported frame rate.

Re-setting FB_ID to its current value is a special case regardless of
PAGE_FLIP_ASYNC, is it not?

So it should be called out somewhere that applies regardless of
PAGE_FLIP_ASYNC. Maybe to the end of the earlier paragraph:


+The changes recorded in an atomic commit apply on top the current KMS state in
+the kernel. Hence, the complete new KMS state is the complete old KMS state 
with
+the committed property settings done on top. The kernel will try to avoid
+no-operation changes, so it is safe for userspace to send redundant property
+settings.  However, not every situation allows for no-op changes, due to the
+need to acquire locks for some attributes. Userspace needs to be aware that 
some
+redundant information might result in oversynchronization issues.  No-operation
+changes do not count towards actually needed changes, e.g.  setting MODE_ID to 
a
+different blob with identical contents as the current KMS state shall not be a
+modeset on its own.

+As a special exception for VRR needs, explicitly setting FB_ID to its
+current value is not a no-op.

Would that work?


I liked this suggestion, thanks! I'll wrap up a v7


I'd like to try to avoid being more specific about what it does
exactly, because that's not the topic here. Such things can be
documented with the property itself. This is a summary of what is or is
not a no-op or a modeset.


Thanks,
pq


Re: [PATCH v6 6/6] drm/doc: Define KMS atomic state set

2023-10-16 Thread Pekka Paalanen
On Mon, 16 Oct 2023 15:42:16 +0200
André Almeida  wrote:

> Hi Pekka,
> 
> On 10/16/23 14:18, Pekka Paalanen wrote:
> > On Mon, 16 Oct 2023 12:52:32 +0200
> > André Almeida  wrote:
> >  
> >> Hi Michel,
> >>
> >> On 8/17/23 12:37, Michel Dänzer wrote:  
> >>> On 8/15/23 20:57, André Almeida wrote:  
>  From: Pekka Paalanen 
> 
>  Specify how the atomic state is maintained between userspace and
>  kernel, plus the special case for async flips.
> 
>  Signed-off-by: Pekka Paalanen 
>  Signed-off-by: André Almeida   
> >>> [...]
> >>> 
>  +An atomic commit with the flag DRM_MODE_PAGE_FLIP_ASYNC is allowed to
>  +effectively change only the FB_ID property on any planes. No-operation 
>  changes
>  +are ignored as always. [...]  
> >>> During the hackfest in Brno, it was mentioned that a commit which re-sets 
> >>> the same FB_ID could actually have an effect with VRR: It could trigger 
> >>> scanout of the next frame before vertical blank has reached its maximum 
> >>> duration. Some kind of mechanism is required for this in order to allow 
> >>> user space to perform low frame rate compensation.
> >>> 
> >> Xaver tested this hypothesis in a flipping the same fb in a VRR monitor
> >> and it worked as expected, so this shouldn't be a concern.  
> > Right, so it must have some effect. It cannot be simply ignored like in
> > the proposed doc wording. Do we special-case re-setting the same FB_ID
> > as "not a no-op" or "not ignored" or some other way?  
> There's an effect in the refresh rate, the image won't change but it 
> will report that a flip had happened asynchronously so the reported 
> framerate will be increased. Maybe an additional wording could be like:
> 
> Flipping to the same FB_ID will result in a immediate flip as if it was 
> changing to a different one, with no effect on the image but effecting 
> the reported frame rate.

Re-setting FB_ID to its current value is a special case regardless of
PAGE_FLIP_ASYNC, is it not?

So it should be called out somewhere that applies regardless of
PAGE_FLIP_ASYNC. Maybe to the end of the earlier paragraph:

> +The changes recorded in an atomic commit apply on top the current KMS state 
> in
> +the kernel. Hence, the complete new KMS state is the complete old KMS state 
> with
> +the committed property settings done on top. The kernel will try to avoid
> +no-operation changes, so it is safe for userspace to send redundant property
> +settings.  However, not every situation allows for no-op changes, due to the
> +need to acquire locks for some attributes. Userspace needs to be aware that 
> some
> +redundant information might result in oversynchronization issues.  
> No-operation
> +changes do not count towards actually needed changes, e.g.  setting MODE_ID 
> to a
> +different blob with identical contents as the current KMS state shall not be 
> a
> +modeset on its own.

+As a special exception for VRR needs, explicitly setting FB_ID to its
+current value is not a no-op.

Would that work?

I'd like to try to avoid being more specific about what it does
exactly, because that's not the topic here. Such things can be
documented with the property itself. This is a summary of what is or is
not a no-op or a modeset.


Thanks,
pq


pgpe4pXB96TQe.pgp
Description: OpenPGP digital signature


[PATCH V2 2/2] drm/amdgpu: Permit PCIe transfer over links with XGMI

2023-10-16 Thread David Francis
When the CPU is XGMI connected, the PCIe links should
not be enumerated for topology purposes. However, PCIe
transfer should still be a valid option for remote
doorbells and MMIO mappings.

Move the XGMI connection check out of the shared helper
function amdgpu_device_is_peer_accessible and into the
topology path.

Signed-off-by: David Francis 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 +---
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c  | 6 --
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index bad2b5577e96..b47cb7f8cfbd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5753,9 +5753,7 @@ bool amdgpu_device_is_peer_accessible(struct 
amdgpu_device *adev,
~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
resource_size_t aper_limit =
adev->gmc.aper_base + adev->gmc.aper_size - 1;
-   bool p2p_access =
-   !adev->gmc.xgmi.connected_to_cpu &&
-   !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
+   bool p2p_access = !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, 
false) < 0);
 
return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 4e530791507e..cb64c19482f3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1514,11 +1514,13 @@ static int kfd_dev_create_p2p_links(void)
goto next;
 
/* check if node(s) is/are peer accessible in one direction or 
bi-direction */
-   ret = kfd_add_peer_prop(new_dev, dev, i, k);
+   if (!new_dev->gpu->adev->gmc.xgmi.connected_to_cpu)
+   ret = kfd_add_peer_prop(new_dev, dev, i, k);
if (ret < 0)
goto out;
 
-   ret = kfd_add_peer_prop(dev, new_dev, k, i);
+   if (!dev->gpu->adev->gmc.xgmi.connected_to_cpu)
+   ret = kfd_add_peer_prop(dev, new_dev, k, i);
if (ret < 0)
goto out;
 next:
-- 
2.34.1



[PATCH V2 1/2] drm/amdgpu: Acquire ttm locks for dmaunmap

2023-10-16 Thread David Francis
dmaunmap can call ttm_bo_validate, which expects the
ttm dma_resv to be held.

Acquire the locks in amdgpu_amdkfd_gpuvm_dmaunmap_mem.

Because the dmaunmap step can now fail, the unmap ioctl UAPI
needs two new arguments. n_dmaunmap_success tracks the number
of devices that have completed dmaunmap. If a device fails
to dmaunmap due to a signal interrupt, n_dmaunmap_bos tracks
the number of bos on that device that were successfully
dmaunmapped.

This failure can also cause the sync_memory step of the ioctl
to be repeated; it is idempotent, so this should not cause any issues.

Signed-off-by: David Francis 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  2 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 23 +++
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 14 +--
 include/uapi/linux/kfd_ioctl.h|  2 ++
 4 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 3ad8dc523b42..781642871900 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -302,7 +302,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct 
amdgpu_device *adev,
  struct kgd_mem *mem, void *drm_priv);
 int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
struct amdgpu_device *adev, struct kgd_mem *mem, void 
*drm_priv);
-void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv);
+int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv, 
uint32_t *num_bos);
 int amdgpu_amdkfd_gpuvm_sync_memory(
struct amdgpu_device *adev, struct kgd_mem *mem, bool intr);
 int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index a15e59abe70a..cbd6032f3d39 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2094,21 +2094,36 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
return ret;
 }
 
-void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv)
+int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv, 
uint32_t *num_bos)
 {
struct kfd_mem_attachment *entry;
struct amdgpu_vm *vm;
+   int ret;
+   int i = 0;
 
vm = drm_priv_to_vm(drm_priv);
 
mutex_lock(&mem->lock);
 
list_for_each_entry(entry, &mem->attachments, list) {
-   if (entry->bo_va->base.vm == vm)
-   kfd_mem_dmaunmap_attachment(mem, entry);
-   }
+   if (i >= *num_bos) {
+   ret = amdgpu_bo_reserve(entry->bo_va->base.bo, false);
+   if (ret) {
+   *num_bos = i;
+   goto out;
+   }
+
+   if (entry->bo_va->base.vm == vm)
+   kfd_mem_dmaunmap_attachment(mem, entry);
 
+   amdgpu_bo_unreserve(entry->bo_va->base.bo);
+   }
+   i++;
+   }
+   *num_bos = 0;
+out:
mutex_unlock(&mem->lock);
+   return ret;
 }
 
 int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 06988cf1db51..a944e255de4a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1379,6 +1379,10 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file 
*filep,
pr_debug("n_success exceeds n_devices\n");
return -EINVAL;
}
+   if (args->n_dmaunmap_success > args->n_devices) {
+   pr_debug("n_dmaunmap_success exceeds n_devices\n");
+   return -EINVAL;
+   }
 
devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
GFP_KERNEL);
@@ -1434,7 +1438,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file 
*filep,
}
 
/* Flush TLBs after waiting for the page table updates to complete */
-   for (i = 0; i < args->n_devices; i++) {
+   for (i = args->n_dmaunmap_success; i < args->n_devices; i++) {
peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
if (WARN_ON_ONCE(!peer_pdd))
continue;
@@ -1442,7 +1446,12 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file 
*filep,
kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
 
/* Remove dma mapping after tlb flush to avoid IO_PAGE_FAULT */
-   amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv);
+   err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv, 
&args->n_dmaunmap_bos);
+   if (err) {
+  

[PATCH 2/2] drm/amd/display: Fix stack size issue on DML2

2023-10-16 Thread Rodrigo Siqueira
This commit is the last part of the fix that reduces the stack size in
the DML2 code.

Cc: Stephen Rothwell 
Cc: Alex Deucher 
Cc: Roman Li 
Cc: Chaitanya Dhere 
Fixes: a2815ada8616 ("drm/amd/display: Introduce DML2")
Signed-off-by: Rodrigo Siqueira 
---
 .../amd/display/dc/dml2/display_mode_core.c   | 99 ++-
 1 file changed, 54 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c 
b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
index 06358b7fe38b..851db026f251 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
@@ -6208,6 +6208,58 @@ static dml_uint_t CalculateMaxVStartup(
return max_vstartup_lines;
 }
 
+static void set_calculate_prefetch_schedule_params(struct display_mode_lib_st 
*mode_lib,
+  struct 
CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params,
+  dml_uint_t j,
+  dml_uint_t k)
+{
+   CalculatePrefetchSchedule_params->DSCDelay = 
mode_lib->ms.DSCDelayPerState[k];
+   
CalculatePrefetchSchedule_params->EnhancedPrefetchScheduleAccelerationFinal = 
mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal;
+   
CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = 
mode_lib->ms.ip.dppclk_delay_subtotal + 
mode_lib->ms.ip.dppclk_delay_cnvc_formatter;
+   
CalculatePrefetchSchedule_params->DPPCLKDelaySCL = 
mode_lib->ms.ip.dppclk_delay_scl;
+   
CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = 
mode_lib->ms.ip.dppclk_delay_scl_lb_only;
+   
CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = 
mode_lib->ms.ip.dppclk_delay_cnvc_cursor;
+   
CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = 
mode_lib->ms.ip.dispclk_delay_subtotal;
+   
CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = 
(dml_uint_t)(mode_lib->ms.SwathWidthYThisState[k] / 
mode_lib->ms.cache_display_cfg.plane.HRatio[k]);
+   CalculatePrefetchSchedule_params->OutputFormat 
= mode_lib->ms.cache_display_cfg.output.OutputFormat[k];
+   
CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = 
mode_lib->ms.ip.max_inter_dcn_tile_repeaters;
+   
CalculatePrefetchSchedule_params->GPUVMPageTableLevels = 
mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
+   CalculatePrefetchSchedule_params->GPUVMEnable = 
mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
+   CalculatePrefetchSchedule_params->HostVMEnable 
= mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
+   
CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = 
mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
+   
CalculatePrefetchSchedule_params->HostVMMinPageSize = 
mode_lib->ms.soc.hostvm_min_page_size_kbytes;
+   
CalculatePrefetchSchedule_params->DynamicMetadataEnable = 
mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k];
+   
CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = 
mode_lib->ms.ip.dynamic_metadata_vm_enabled;
+   
CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = 
mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k];
+   
CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = 
mode_lib->ms.cache_display_cfg.plane.DynamicMetadataTransmittedBytes[k];
+   CalculatePrefetchSchedule_params->UrgentLatency 
= mode_lib->ms.UrgLatency;
+   
CalculatePrefetchSchedule_params->UrgentExtraLatency = 
mode_lib->ms.ExtraLatency;
+   CalculatePrefetchSchedule_params->TCalc = 
mode_lib->ms.TimeCalc;
+   
CalculatePrefetchSchedule_params->PDEAndMetaPTEBytesFrame = 
mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k];
+   CalculatePrefetchSchedule_params->MetaRowByte = 
mode_lib->ms.MetaRowBytes[j][k];
+   
CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = 
mode_lib->ms.DPTEBytesPerRow[j][k];
+   
CalculatePrefetchSchedule_params->PrefetchSourceLinesY = 
mode_lib->ms.PrefetchLinesY[j][k];
+   CalculatePrefetchSchedule_params->VInitPreFillY 
= mode_lib->ms.PrefillY[k];
+   CalculatePrefetchSchedule_params->MaxNumSwathY 
= mode_li

[PATCH 0/2] Reduce stack size for DML2

2023-10-16 Thread Rodrigo Siqueira
Stephen discovers a stack size issue when compiling the latest amdgpu
code with allmodconfig. This patchset addresses that issue by splitting
a large function into two smaller parts.

Thanks
Siqueira

Rodrigo Siqueira (2):
  drm/amd/display: Reduce stack size by splitting function
  drm/amd/display: Fix stack size issue on DML2

 .../amd/display/dc/dml2/display_mode_core.c   | 3289 +
 1 file changed, 1653 insertions(+), 1636 deletions(-)

-- 
2.42.0



Re: [PATCH v6 6/6] drm/doc: Define KMS atomic state set

2023-10-16 Thread André Almeida

Hi Pekka,

On 10/16/23 14:18, Pekka Paalanen wrote:

On Mon, 16 Oct 2023 12:52:32 +0200
André Almeida  wrote:


Hi Michel,

On 8/17/23 12:37, Michel Dänzer wrote:

On 8/15/23 20:57, André Almeida wrote:

From: Pekka Paalanen 

Specify how the atomic state is maintained between userspace and
kernel, plus the special case for async flips.

Signed-off-by: Pekka Paalanen 
Signed-off-by: André Almeida 

[...]
  

+An atomic commit with the flag DRM_MODE_PAGE_FLIP_ASYNC is allowed to
+effectively change only the FB_ID property on any planes. No-operation changes
+are ignored as always. [...]

During the hackfest in Brno, it was mentioned that a commit which re-sets the 
same FB_ID could actually have an effect with VRR: It could trigger scanout of 
the next frame before vertical blank has reached its maximum duration. Some 
kind of mechanism is required for this in order to allow user space to perform 
low frame rate compensation.
  

Xaver tested this hypothesis in a flipping the same fb in a VRR monitor
and it worked as expected, so this shouldn't be a concern.

Right, so it must have some effect. It cannot be simply ignored like in
the proposed doc wording. Do we special-case re-setting the same FB_ID
as "not a no-op" or "not ignored" or some other way?
There's an effect in the refresh rate, the image won't change but it 
will report that a flip had happened asynchronously so the reported 
framerate will be increased. Maybe an additional wording could be like:


Flipping to the same FB_ID will result in a immediate flip as if it was 
changing to a different one, with no effect on the image but effecting 
the reported frame rate.





Thanks,
pq


RE: [PATCH v3] drm/amd/amdgpu/vcn: Add RB decouple feature under SRIOV

2023-10-16 Thread Liu, Leo
[AMD Official Use Only - General]

> -Original Message-
> From: amd-gfx  On Behalf Of
> Bokun Zhang
> Sent: Friday, October 13, 2023 1:43 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Zhang, Bokun 
> Subject: [PATCH v3] drm/amd/amdgpu/vcn: Add RB decouple feature under
> SRIOV
>
> - Add code to enable RB decouple feature.
>   This feature is controlled by SRIOV host.
>   Once enabled, it allows VCN0's job to be remapped to
>   VCN1 at hardware level and improves VCN availability
>
Since this is feature is only used by SRIOV, we need to make sure the changes 
not affect BM path. Also please split the patch into multiple patches.


> Signed-off-by: Bokun Zhang 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c |  3 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 52 +--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h|  4 ++
>  drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h |  5 +-
>  drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c   | 71 -
>  5 files changed, 110 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> index f4963330c772..7e8c2dbb34fb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> @@ -204,6 +204,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device
> *adev)
>   adev->vcn.inst[i].fw_shared.gpu_addr = adev-
> >vcn.inst[i].gpu_addr +
>   bo_size - fw_shared_size;
>
> + /* clean up fw share */
> + memset(adev->vcn.inst[i].fw_shared.cpu_addr, 0,
> fw_shared_size);
> +
>
This should be redundant, since it should be got cleared when allocation.

adev->vcn.inst[i].fw_shared.mem_size = fw_shared_size;
>
>   if (amdgpu_vcnfw_log) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
> index 0815c5a97564..6935ab74f481 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
> @@ -169,6 +169,9 @@
>  #define AMDGPU_VCN_SMU_VERSION_INFO_FLAG (1 << 11)  #define
> AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG (1 << 11)  #define
> AMDGPU_VCN_VF_RB_SETUP_FLAG (1 << 14)
> +#define AMDGPU_VCN_VF_RB_DECOUPLE_FLAG (1 << 15)
> +
> +#define MAX_NUM_VCN_RB_SETUP 4
>
>  #define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER 0x0001
>  #define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER   0x0001
> @@ -335,22 +338,46 @@ struct amdgpu_fw_shared {
>   struct amdgpu_fw_shared_smu_interface_info smu_interface_info;  };
>
> +struct amdgpu_vcn_rb_setup_info
> +{
> +uint32_t  rb_addr_lo;
> +uint32_t  rb_addr_hi;
> +uint32_t  rb_size;
> +};
> +
>  struct amdgpu_fw_shared_rb_setup {
>   uint32_t is_rb_enabled_flags;
> - uint32_t rb_addr_lo;
> - uint32_t rb_addr_hi;
> - uint32_t  rb_size;
> - uint32_t  rb4_addr_lo;
> - uint32_t  rb4_addr_hi;
> - uint32_t  rb4_size;
> - uint32_t  reserved[6];
> +
> + union {
> + // 12 DWords

This can be removed.

> + struct {
> + uint32_t rb_addr_lo;
> + uint32_t rb_addr_hi;
> + uint32_t  rb_size;
> + uint32_t  rb4_addr_lo;
> + uint32_t  rb4_addr_hi;
> + uint32_t  rb4_size;
> + uint32_t  reserved[6];
> + };
> +
> + // 12 DWords

Same here.

Regards,
Leo

> + struct {
> + struct amdgpu_vcn_rb_setup_info
> rb_info[MAX_NUM_VCN_RB_SETUP];
> + };
> + };
>  };
>
> +
>  struct amdgpu_fw_shared_drm_key_wa {
>   uint8_t  method;
>   uint8_t  reserved[3];
>  };
>
> +struct amdgpu_fw_shared_queue_decouple {
> +uint8_t  is_enabled;
> +uint8_t  reserved[7];
> +};
> +
>  struct amdgpu_vcn4_fw_shared {
>   uint32_t present_flag_0;
>   uint8_t pad[12];
> @@ -361,6 +388,8 @@ struct amdgpu_vcn4_fw_shared {
>   struct amdgpu_fw_shared_rb_setup rb_setup;
>   struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface;
>   struct amdgpu_fw_shared_drm_key_wa drm_key_wa;
> + uint8_t pad3[9];
> + struct amdgpu_fw_shared_queue_decouple decouple;
>  };
>
>  struct amdgpu_vcn_fwlog {
> @@ -378,6 +407,15 @@ struct amdgpu_vcn_decode_buffer {
>   uint32_t pad[30];
>  };
>
> +struct amdgpu_vcn_rb_metadata {
> + uint32_t size;
> + uint32_t present_flag_0;
> +
> + uint8_t version;
> + uint8_t ring_id;
> + uint8_t pad[26];
> +};
> +
>  #define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80  #define
> VCN_BLOCK_DECODE_DISABLE_MASK 0x40  #define
> VCN_BLOCK_QUEUE_DISABLE_MASK 0xC0 diff --git
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> index fabb83e9d9ae..858ef21ae515 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> @@ -126,6 +126,8 @@ enum AMDGIM_FEATURE_FLAG {

Re: [PATCH v6 6/6] drm/doc: Define KMS atomic state set

2023-10-16 Thread Pekka Paalanen
On Mon, 16 Oct 2023 12:52:32 +0200
André Almeida  wrote:

> Hi Michel,
> 
> On 8/17/23 12:37, Michel Dänzer wrote:
> > On 8/15/23 20:57, André Almeida wrote:  
> >> From: Pekka Paalanen 
> >>
> >> Specify how the atomic state is maintained between userspace and
> >> kernel, plus the special case for async flips.
> >>
> >> Signed-off-by: Pekka Paalanen 
> >> Signed-off-by: André Almeida   
> > [...]
> >  
> >> +An atomic commit with the flag DRM_MODE_PAGE_FLIP_ASYNC is allowed to
> >> +effectively change only the FB_ID property on any planes. No-operation 
> >> changes
> >> +are ignored as always. [...]  
> > During the hackfest in Brno, it was mentioned that a commit which re-sets 
> > the same FB_ID could actually have an effect with VRR: It could trigger 
> > scanout of the next frame before vertical blank has reached its maximum 
> > duration. Some kind of mechanism is required for this in order to allow 
> > user space to perform low frame rate compensation.
> >  
> Xaver tested this hypothesis in a flipping the same fb in a VRR monitor 
> and it worked as expected, so this shouldn't be a concern.

Right, so it must have some effect. It cannot be simply ignored like in
the proposed doc wording. Do we special-case re-setting the same FB_ID
as "not a no-op" or "not ignored" or some other way?


Thanks,
pq


pgpcvfTtErhDd.pgp
Description: OpenPGP digital signature


Re: [PATCH v6 6/6] drm/doc: Define KMS atomic state set

2023-10-16 Thread André Almeida

Hi Michel,

On 8/17/23 12:37, Michel Dänzer wrote:

On 8/15/23 20:57, André Almeida wrote:

From: Pekka Paalanen 

Specify how the atomic state is maintained between userspace and
kernel, plus the special case for async flips.

Signed-off-by: Pekka Paalanen 
Signed-off-by: André Almeida 

[...]


+An atomic commit with the flag DRM_MODE_PAGE_FLIP_ASYNC is allowed to
+effectively change only the FB_ID property on any planes. No-operation changes
+are ignored as always. [...]

During the hackfest in Brno, it was mentioned that a commit which re-sets the 
same FB_ID could actually have an effect with VRR: It could trigger scanout of 
the next frame before vertical blank has reached its maximum duration. Some 
kind of mechanism is required for this in order to allow user space to perform 
low frame rate compensation.

Xaver tested this hypothesis in a flipping the same fb in a VRR monitor 
and it worked as expected, so this shouldn't be a concern.


Thanks,
    André




RE: [PATCH] drm/amdgpu/mes11: remove aggregated doorbell code

2023-10-16 Thread Xiao, Jack
[AMD Official Use Only - General]

If aggregated doorbell will not be intended for user queue oversubscription, 
it's ok to remove it.

Reviewed-by: Jack Xiao 

-Original Message-
From: amd-gfx  On Behalf Of Alex Deucher
Sent: Friday, October 13, 2023 9:53 PM
To: Deucher, Alexander 
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu/mes11: remove aggregated doorbell code

Ping?

On Wed, Oct 11, 2023 at 2:27 PM Alex Deucher  wrote:
>
> It's not enabled in hardware so the code is dead.
> Remove it.
>
> Signed-off-by: Alex Deucher 
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 86
> +-  drivers/gpu/drm/amd/amdgpu/mes_v11_0.c |
> 56 -  drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 83
> -
>  3 files changed, 40 insertions(+), 185 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> index 27b224b0688a..91c07ab4f14e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> @@ -5170,45 +5170,17 @@ static u64 gfx_v11_0_ring_get_wptr_gfx(struct
> amdgpu_ring *ring)  static void gfx_v11_0_ring_set_wptr_gfx(struct
> amdgpu_ring *ring)  {
> struct amdgpu_device *adev = ring->adev;
> -   uint32_t *wptr_saved;
> -   uint32_t *is_queue_unmap;
> -   uint64_t aggregated_db_index;
> -   uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size;
> -   uint64_t wptr_tmp;
>
> -   if (ring->is_mes_queue) {
> -   wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
> -   is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
> - sizeof(uint32_t));
> -   aggregated_db_index =
> -   amdgpu_mes_get_aggregated_doorbell_index(adev,
> -
> ring->hw_prio);
> -
> -   wptr_tmp = ring->wptr & ring->buf_mask;
> -   atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
> -   *wptr_saved = wptr_tmp;
> -   /* assume doorbell always being used by mes mapped queue */
> -   if (*is_queue_unmap) {
> -   WDOORBELL64(aggregated_db_index, wptr_tmp);
> -   WDOORBELL64(ring->doorbell_index, wptr_tmp);
> -   } else {
> -   WDOORBELL64(ring->doorbell_index, wptr_tmp);
> -
> -   if (*is_queue_unmap)
> -   WDOORBELL64(aggregated_db_index, wptr_tmp);
> -   }
> +   if (ring->use_doorbell) {
> +   /* XXX check if swapping is necessary on BE */
> +   atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
> +ring->wptr);
> +   WDOORBELL64(ring->doorbell_index, ring->wptr);
> } else {
> -   if (ring->use_doorbell) {
> -   /* XXX check if swapping is necessary on BE */
> -   atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
> -ring->wptr);
> -   WDOORBELL64(ring->doorbell_index, ring->wptr);
> -   } else {
> -   WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
> -lower_32_bits(ring->wptr));
> -   WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
> -upper_32_bits(ring->wptr));
> -   }
> +   WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
> +lower_32_bits(ring->wptr));
> +   WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
> +upper_32_bits(ring->wptr));
> }
>  }
>
> @@ -5233,42 +5205,14 @@ static u64
> gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)  static void
> gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)  {
> struct amdgpu_device *adev = ring->adev;
> -   uint32_t *wptr_saved;
> -   uint32_t *is_queue_unmap;
> -   uint64_t aggregated_db_index;
> -   uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size;
> -   uint64_t wptr_tmp;
>
> -   if (ring->is_mes_queue) {
> -   wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
> -   is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
> - sizeof(uint32_t));
> -   aggregated_db_index =
> -   amdgpu_mes_get_aggregated_doorbell_index(adev,
> -
> ring->hw_prio);
> -
> -   wptr_tmp = ring->wptr & ring->buf_mask;
> -   atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
> -   *wptr_saved = wptr_tmp;
> -   /* assume doorbell always used by mes mapped queue */
> -   if (*is_queue_unmap) {
> -