[PATCH 2/4] drm/amd: Align SMU11 SMU_MSG_OverridePcieParameters implementation with SMU13

2023-07-07 Thread Mario Limonciello
SMU13 overrides dynamic PCIe lane width and dynamic speed by when on
certain hosts. commit 87c617c72628 ("drm/amd/pm: conditionally disable
pcie lane switching for some sienna_cichlid SKUs") worked around this
issue by setting up certain SKUs to set up certain limits, but the same
fundamental problem with those hosts affects all SMU11 implmentations
as well, so align the SMU11 and SMU13 driver handling.

Signed-off-by: Mario Limonciello 
---
 .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 89 ---
 1 file changed, 18 insertions(+), 71 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 8fe2e1716da4..f6599c00a6fd 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -2077,89 +2077,36 @@ static int 
sienna_cichlid_display_disable_memory_clock_switch(struct smu_context
return ret;
 }
 
-static void sienna_cichlid_get_override_pcie_settings(struct smu_context *smu,
- uint32_t 
*gen_speed_override,
- uint32_t 
*lane_width_override)
-{
-   struct amdgpu_device *adev = smu->adev;
-
-   *gen_speed_override = 0xff;
-   *lane_width_override = 0xff;
-
-   switch (adev->pdev->device) {
-   case 0x73A0:
-   case 0x73A1:
-   case 0x73A2:
-   case 0x73A3:
-   case 0x73AB:
-   case 0x73AE:
-   /* Bit 7:0: PCIE lane width, 1 to 7 corresponds is x1 to x32 */
-   *lane_width_override = 6;
-   break;
-   case 0x73E0:
-   case 0x73E1:
-   case 0x73E3:
-   *lane_width_override = 4;
-   break;
-   case 0x7420:
-   case 0x7421:
-   case 0x7422:
-   case 0x7423:
-   case 0x7424:
-   *lane_width_override = 3;
-   break;
-   default:
-   break;
-   }
-}
-
-#define MAX(a, b)  ((a) > (b) ? (a) : (b))
-
 static int sienna_cichlid_update_pcie_parameters(struct smu_context *smu,
 uint32_t pcie_gen_cap,
 uint32_t pcie_width_cap)
 {
struct smu_11_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context;
struct smu_11_0_pcie_table *pcie_table = 
_context->dpm_tables.pcie_table;
-   uint32_t gen_speed_override, lane_width_override;
-   uint8_t *table_member1, *table_member2;
-   uint32_t min_gen_speed, max_gen_speed;
-   uint32_t min_lane_width, max_lane_width;
-   uint32_t smu_pcie_arg;
+   u32 smu_pcie_arg;
int ret, i;
 
-   GET_PPTABLE_MEMBER(PcieGenSpeed, _member1);
-   GET_PPTABLE_MEMBER(PcieLaneCount, _member2);
-
-   sienna_cichlid_get_override_pcie_settings(smu,
- _speed_override,
- _width_override);
+   /* PCIE gen speed and lane width override */
+   if (!amdgpu_device_pcie_dynamic_switching_supported()) {
+   if (pcie_table->pcie_gen[NUM_LINK_LEVELS - 1] < pcie_gen_cap)
+   pcie_gen_cap = pcie_table->pcie_gen[NUM_LINK_LEVELS - 
1];
 
-   /* PCIE gen speed override */
-   if (gen_speed_override != 0xff) {
-   min_gen_speed = MIN(pcie_gen_cap, gen_speed_override);
-   max_gen_speed = MIN(pcie_gen_cap, gen_speed_override);
-   } else {
-   min_gen_speed = MAX(0, table_member1[0]);
-   max_gen_speed = MIN(pcie_gen_cap, table_member1[1]);
-   min_gen_speed = min_gen_speed > max_gen_speed ?
-   max_gen_speed : min_gen_speed;
-   }
-   pcie_table->pcie_gen[0] = min_gen_speed;
-   pcie_table->pcie_gen[1] = max_gen_speed;
+   if (pcie_table->pcie_lane[NUM_LINK_LEVELS - 1] < pcie_width_cap)
+   pcie_width_cap = pcie_table->pcie_lane[NUM_LINK_LEVELS 
- 1];
 
-   /* PCIE lane width override */
-   if (lane_width_override != 0xff) {
-   min_lane_width = MIN(pcie_width_cap, lane_width_override);
-   max_lane_width = MIN(pcie_width_cap, lane_width_override);
+   /* Force all levels to use the same settings */
+   for (i = 0; i < NUM_LINK_LEVELS; i++) {
+   pcie_table->pcie_gen[i] = pcie_gen_cap;
+   pcie_table->pcie_lane[i] = pcie_width_cap;
+   }
} else {
-   min_lane_width = MAX(1, table_member2[0]);
-   max_lane_width = MIN(pcie_width_cap, table_member2[1]);
-   min_lane_width = min_lane_width > max_lane_width ?
-max_lane_width : min_lane_width;
+   for (i = 0; i < NUM_LINK_LEVELS; i++) {
+   if (pcie_table->pcie_gen[i] > pcie_gen_cap)
+   

[PATCH 3/4] drm/amd: Use amdgpu_device_pcie_dynamic_switching_supported() for SMU7

2023-07-07 Thread Mario Limonciello
SMU7 does a check if the dGPU is inserted into a Rocket Lake system,
to turn off DPM.  Extend this check to all systems that have problems
with dynamic switching by using the
amdgpu_device_pcie_dynamic_switching_supported() helper.

Signed-off-by: Mario Limonciello 
---
 .../gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c| 14 ++
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c 
b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
index 6841a4bce186..1cb402264497 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
@@ -1798,17 +1798,6 @@ static int smu7_disable_dpm_tasks(struct pp_hwmgr *hwmgr)
return result;
 }
 
-static bool intel_core_rkl_chk(void)
-{
-#if IS_ENABLED(CONFIG_X86_64)
-   struct cpuinfo_x86 *c = _data(0);
-
-   return (c->x86 == 6 && c->x86_model == INTEL_FAM6_ROCKETLAKE);
-#else
-   return false;
-#endif
-}
-
 static void smu7_init_dpm_defaults(struct pp_hwmgr *hwmgr)
 {
struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
@@ -1835,7 +1824,8 @@ static void smu7_init_dpm_defaults(struct pp_hwmgr *hwmgr)
data->mclk_dpm_key_disabled = hwmgr->feature_mask & PP_MCLK_DPM_MASK ? 
false : true;
data->sclk_dpm_key_disabled = hwmgr->feature_mask & PP_SCLK_DPM_MASK ? 
false : true;
data->pcie_dpm_key_disabled =
-   intel_core_rkl_chk() || !(hwmgr->feature_mask & 
PP_PCIE_DPM_MASK);
+   !amdgpu_device_pcie_dynamic_switching_supported() ||
+   !(hwmgr->feature_mask & PP_PCIE_DPM_MASK);
/* need to set voltage control types before EVV patching */
data->voltage_control = SMU7_VOLTAGE_CONTROL_NONE;
data->vddci_control = SMU7_VOLTAGE_CONTROL_NONE;
-- 
2.34.1



[PATCH 4/4] drm/amd: Drop amdgpu_device_aspm_support_quirk()

2023-07-07 Thread Mario Limonciello
NV and VI currently set up a quirk to not enable ASPM on Alder Lake
systems, but the issue appears to be tied to hosts without support
for dynamic speed switching. Migrate both of these over to use
amdgpu_device_pcie_dynamic_switching_supported() instead and drop
amdgpu_device_aspm_support_quirk().

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 ---
 drivers/gpu/drm/amd/amdgpu/nv.c|  5 -
 drivers/gpu/drm/amd/amdgpu/vi.c|  5 -
 4 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 813713f42d5e..6ecf42c4c970 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1315,7 +1315,6 @@ int amdgpu_device_pci_reset(struct amdgpu_device *adev);
 bool amdgpu_device_need_post(struct amdgpu_device *adev);
 bool amdgpu_device_pcie_dynamic_switching_supported(void);
 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev);
-bool amdgpu_device_aspm_support_quirk(void);
 
 void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
  u64 num_vis_bytes);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 7314529553f6..a9e757f899f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1505,17 +1505,6 @@ bool amdgpu_device_should_use_aspm(struct amdgpu_device 
*adev)
return pcie_aspm_enabled(adev->pdev);
 }
 
-bool amdgpu_device_aspm_support_quirk(void)
-{
-#if IS_ENABLED(CONFIG_X86)
-   struct cpuinfo_x86 *c = _data(0);
-
-   return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
-#else
-   return true;
-#endif
-}
-
 /* if we get transitioned to only one device, take VGA back */
 /**
  * amdgpu_device_vga_set_decode - enable/disable vga decode
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 51523b27a186..71bc5b2f36cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -527,7 +527,10 @@ static int nv_set_vce_clocks(struct amdgpu_device *adev, 
u32 evclk, u32 ecclk)
 
 static void nv_program_aspm(struct amdgpu_device *adev)
 {
-   if (!amdgpu_device_should_use_aspm(adev) || 
!amdgpu_device_aspm_support_quirk())
+   if (!amdgpu_device_should_use_aspm(adev))
+   return;
+
+   if (!amdgpu_device_pcie_dynamic_switching_supported())
return;
 
if (!(adev->flags & AMD_IS_APU) &&
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 6a8494f98d3e..f44c78e69b7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -1124,7 +1124,10 @@ static void vi_program_aspm(struct amdgpu_device *adev)
bool bL1SS = false;
bool bClkReqSupport = true;
 
-   if (!amdgpu_device_should_use_aspm(adev) || 
!amdgpu_device_aspm_support_quirk())
+   if (!amdgpu_device_should_use_aspm(adev))
+   return;
+
+   if (!amdgpu_device_pcie_dynamic_switching_supported())
return;
 
if (adev->flags & AMD_IS_APU ||
-- 
2.34.1



[PATCH 0/4] Extend amdgpu_device_pcie_dynamic_switching_supported()

2023-07-07 Thread Mario Limonciello
amdgpu_device_pcie_dynamic_switching_supported() currently only covers
SMU13. It sets up the pcietables so that effectively DPM can't change
speed or lane width dynamically on problematic hosts.

Earlier quirks to SMU11 did a similar solution by looking at specific
PCI IDs typically paired with problematic products.

Even earlier dGPUS used in Intel Alder Lake and Rocket lake adopted
similar solutions that would turn off DPM.

These all come down to the same fundmental problem; Intel hosts can't
handle these features. There is nothing to stop someone from taking a
Navi14 and putting it into Sapphire Rapids system and hitting the
same problem that was observed when it was placed into an Alder
Lake-S system.

Because of this; drop all the specific Intel model + AMD dGPU matching
across the driver and instead match ALL Intel hosts to do these
quirks of setting PCIe override parameters or turning off DPM.

If a new Intel host does work well with dynamic speed switching we
can later adjust amdgpu_device_pcie_dynamic_switching_supported() to
have a switch/case where we allow list those hosts, or enumerate all
the broken ones and disallow list them.

Mario Limonciello (4):
  drm/amd: Move helper for dynamic speed switch check out of smu13
  drm/amd: Align SMU11 SMU_MSG_OverridePcieParameters implementation
with SMU13
  drm/amd: Use amdgpu_device_pcie_dynamic_switching_supported() for SMU7
  drm/amd: Drop amdgpu_device_aspm_support_quirk()

 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 30 ---
 drivers/gpu/drm/amd/amdgpu/nv.c   |  5 +-
 drivers/gpu/drm/amd/amdgpu/vi.c   |  5 +-
 .../drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c   | 14 +--
 .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 89 ---
 .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c| 21 +
 7 files changed, 49 insertions(+), 117 deletions(-)

-- 
2.34.1



[PATCH 1/4] drm/amd: Move helper for dynamic speed switch check out of smu13

2023-07-07 Thread Mario Limonciello
This helper is used for checking if the connected host supports
the feature, it can be moved into generic code to be used by other
smu implementations as well.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 19 +
 .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c| 21 +--
 3 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index dc4dc1446a19..813713f42d5e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1313,6 +1313,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
 int amdgpu_device_pci_reset(struct amdgpu_device *adev);
 bool amdgpu_device_need_post(struct amdgpu_device *adev);
+bool amdgpu_device_pcie_dynamic_switching_supported(void);
 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev);
 bool amdgpu_device_aspm_support_quirk(void);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index fcf5f07c4775..7314529553f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1461,6 +1461,25 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
return true;
 }
 
+/*
+ * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
+ * speed switching. Until we have confirmation from Intel that a specific host
+ * supports it, it's safer that we keep it disabled for all.
+ *
+ * 
https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
+ * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
+ */
+bool amdgpu_device_pcie_dynamic_switching_supported(void)
+{
+#if IS_ENABLED(CONFIG_X86)
+   struct cpuinfo_x86 *c = _data(0);
+
+   if (c->x86_vendor == X86_VENDOR_INTEL)
+   return false;
+#endif
+   return true;
+}
+
 /**
  * amdgpu_device_should_use_aspm - check if the device should program ASPM
  *
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index cf7e729020ab..9b62b45ebb7f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -2425,25 +2425,6 @@ int smu_v13_0_mode1_reset(struct smu_context *smu)
return ret;
 }
 
-/*
- * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
- * speed switching. Until we have confirmation from Intel that a specific host
- * supports it, it's safer that we keep it disabled for all.
- *
- * 
https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
- * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
- */
-static bool smu_v13_0_is_pcie_dynamic_switching_supported(void)
-{
-#if IS_ENABLED(CONFIG_X86)
-   struct cpuinfo_x86 *c = _data(0);
-
-   if (c->x86_vendor == X86_VENDOR_INTEL)
-   return false;
-#endif
-   return true;
-}
-
 int smu_v13_0_update_pcie_parameters(struct smu_context *smu,
 uint32_t pcie_gen_cap,
 uint32_t pcie_width_cap)
@@ -2455,7 +2436,7 @@ int smu_v13_0_update_pcie_parameters(struct smu_context 
*smu,
uint32_t smu_pcie_arg;
int ret, i;
 
-   if (!smu_v13_0_is_pcie_dynamic_switching_supported()) {
+   if (!amdgpu_device_pcie_dynamic_switching_supported()) {
if (pcie_table->pcie_gen[num_of_levels - 1] < pcie_gen_cap)
pcie_gen_cap = pcie_table->pcie_gen[num_of_levels - 1];
 
-- 
2.34.1



[Patch v2] drm/ttm: Use init_on_free to delay release TTM BOs

2023-07-07 Thread Rajneesh Bhardwaj
Delay release TTM BOs when the kernel default setting is init_on_free.
This offloads the overhead of clearing the system memory to the work
item and potentially a different CPU. This could be very beneficial when
the application does a lot of malloc/free style allocations of system
memory.

Reviewed-by: Christian König .
Signed-off-by: Rajneesh Bhardwaj 
---
Changes in v2:
- Updated commit message as per Christian's feedback

 drivers/gpu/drm/ttm/ttm_bo.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 326a3d13a829..bd2e7e4f497a 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -347,6 +347,7 @@ static void ttm_bo_release(struct kref *kref)
 
if (!dma_resv_test_signaled(bo->base.resv,
DMA_RESV_USAGE_BOOKKEEP) ||
+   (want_init_on_free() && (bo->ttm != NULL)) ||
!dma_resv_trylock(bo->base.resv)) {
/* The BO is not idle, resurrect it for delayed destroy 
*/
ttm_bo_flush_all_fences(bo);
-- 
2.17.1



Re: [PATCH 1/4] drm/amdkfd: add kfd2kgd debugger callbacks for GC v9.4.3

2023-07-07 Thread Kim, Jonathan
Yeah that's an unfortunate mismatch.
Leave it then.  We can always clean it up later if theres a strong preference 
to do so.

Jon


From: Huang, JinHuiEric 
Sent: Friday, July 7, 2023 8:25 PM
To: Kim, Jonathan ; amd-gfx@lists.freedesktop.org 

Subject: Re: [PATCH 1/4] drm/amdkfd: add kfd2kgd debugger callbacks for GC 
v9.4.3

Thanks for your review. The prefix name change will be contradictory that new 
functions prefix name is different with existing functions prefix name. Are you 
sure it doesn't matter?

Regards,
Eric

On 2023-07-07 19:52, Kim, Jonathan wrote:
I would change the static prefix names from kgd_gfx_ to kgd_gc_ to match file 
name and specify it as the target GC version.

With that fixed and assuming grace period instance fix ups will follow after, 
this patch and series is:

Reviewed-by: Jonathan Kim 



From: Huang, JinHuiEric 

Sent: Friday, July 7, 2023 1:46 PM
To: amd-gfx@lists.freedesktop.org 

Cc: Kim, Jonathan ; Kim, 
Jonathan ; Huang, JinHuiEric 

Subject: [PATCH 1/4] drm/amdkfd: add kfd2kgd debugger callbacks for GC v9.4.3

From: Jonathan Kim 

Implement the similarities as GC v9.4.2, and the difference
for GC v9.4.3 HW spec, i.e. xcc instance.

Signed-off-by: Jonathan Kim 
Signed-off-by: Eric Huang 

---
 .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  |   8 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h  |  27 +++
 .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   | 166 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|   3 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h|   6 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c|   3 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |   3 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |   3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_debug.c|   3 +-
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |   3 +-
 10 files changed, 213 insertions(+), 12 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
index 60f9e027fb66..a06a99c5d311 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -23,6 +23,7 @@
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_amdkfd_arcturus.h"
 #include "amdgpu_amdkfd_gfx_v9.h"
+#include "amdgpu_amdkfd_aldebaran.h"
 #include "gc/gc_9_4_2_offset.h"
 #include "gc/gc_9_4_2_sh_mask.h"
 #include 
@@ -36,7 +37,7 @@
  * initialize the debug mode registers after it has disabled GFX off during the
  * debug session.
  */
-static uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
+uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
 bool restore_dbg_registers,
 uint32_t vmid)
 {
@@ -107,7 +108,7 @@ static uint32_t 
kgd_aldebaran_set_wave_launch_trap_override(struct amdgpu_device
 return data;
 }

-static uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev,
+uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev,
 uint8_t wave_launch_mode,
 uint32_t vmid)
 {
@@ -125,7 +126,8 @@ static uint32_t kgd_gfx_aldebaran_set_address_watch(
 uint32_t watch_address_mask,
 uint32_t watch_id,
 uint32_t watch_mode,
-   uint32_t debug_vmid)
+   uint32_t debug_vmid,
+   uint32_t inst )
 {
 uint32_t watch_address_high;
 uint32_t watch_address_low;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
new file mode 100644
index ..a7bdaf8d82dd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright 

Re: [PATCH 1/4] drm/amdkfd: add kfd2kgd debugger callbacks for GC v9.4.3

2023-07-07 Thread Eric Huang
Thanks for your review. The prefix name change will be contradictory 
that new functions prefix name is different with existing functions 
prefix name. Are you sure it doesn't matter?


Regards,
Eric

On 2023-07-07 19:52, Kim, Jonathan wrote:
I would change the static prefix names from kgd_gfx_ to kgd_gc_ to 
match file name and specify it as the target GC version.


With that fixed and assuming grace period instance fix ups will follow 
after, this patch and series is:


Reviewed-by: Jonathan Kim 



*From:* Huang, JinHuiEric 
*Sent:* Friday, July 7, 2023 1:46 PM
*To:* amd-gfx@lists.freedesktop.org 
*Cc:* Kim, Jonathan ; Kim, Jonathan 
; Huang, JinHuiEric 
*Subject:* [PATCH 1/4] drm/amdkfd: add kfd2kgd debugger callbacks for 
GC v9.4.3

From: Jonathan Kim 

Implement the similarities as GC v9.4.2, and the difference
for GC v9.4.3 HW spec, i.e. xcc instance.

Signed-off-by: Jonathan Kim 
Signed-off-by: Eric Huang 
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  |   8 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h  |  27 +++
 .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   | 166 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c    |   3 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h    |   6 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c    |   3 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |   3 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |   3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_debug.c    |   3 +-
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |   3 +-
 10 files changed, 213 insertions(+), 12 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c

index 60f9e027fb66..a06a99c5d311 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -23,6 +23,7 @@
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_amdkfd_arcturus.h"
 #include "amdgpu_amdkfd_gfx_v9.h"
+#include "amdgpu_amdkfd_aldebaran.h"
 #include "gc/gc_9_4_2_offset.h"
 #include "gc/gc_9_4_2_sh_mask.h"
 #include 
@@ -36,7 +37,7 @@
  * initialize the debug mode registers after it has disabled GFX off 
during the

  * debug session.
  */
-static uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device 
*adev,

+uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
 bool restore_dbg_registers,
 uint32_t vmid)
 {
@@ -107,7 +108,7 @@ static uint32_t 
kgd_aldebaran_set_wave_launch_trap_override(struct amdgpu_device

 return data;
 }

-static uint32_t kgd_aldebaran_set_wave_launch_mode(struct 
amdgpu_device *adev,

+uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev,
 uint8_t wave_launch_mode,
 uint32_t vmid)
 {
@@ -125,7 +126,8 @@ static uint32_t kgd_gfx_aldebaran_set_address_watch(
 uint32_t watch_address_mask,
 uint32_t watch_id,
 uint32_t watch_mode,
-   uint32_t debug_vmid)
+   uint32_t debug_vmid,
+   uint32_t inst )
 {
 uint32_t watch_address_high;
 uint32_t watch_address_low;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h

new file mode 100644
index ..a7bdaf8d82dd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person 
obtaining a
+ * copy of this software and associated documentation files (the 
"Software"),
+ * to deal in the Software without restriction, including without 
limitation
+ * the rights to use, copy, modify, merge, publish, distribute, 
sublicense,

+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
included in

+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 
SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
DAMAGES OR

+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */

Re: [PATCH 1/4] drm/amdkfd: add kfd2kgd debugger callbacks for GC v9.4.3

2023-07-07 Thread Kim, Jonathan
I would change the static prefix names from kgd_gfx_ to kgd_gc_ to match file 
name and specify it as the target GC version.

With that fixed and assuming grace period instance fix ups will follow after, 
this patch and series is:

Reviewed-by: Jonathan Kim 



From: Huang, JinHuiEric 
Sent: Friday, July 7, 2023 1:46 PM
To: amd-gfx@lists.freedesktop.org 
Cc: Kim, Jonathan ; Kim, Jonathan ; 
Huang, JinHuiEric 
Subject: [PATCH 1/4] drm/amdkfd: add kfd2kgd debugger callbacks for GC v9.4.3

From: Jonathan Kim 

Implement the similarities as GC v9.4.2, and the difference
for GC v9.4.3 HW spec, i.e. xcc instance.

Signed-off-by: Jonathan Kim 
Signed-off-by: Eric Huang 
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  |   8 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h  |  27 +++
 .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   | 166 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|   3 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h|   6 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c|   3 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |   3 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |   3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_debug.c|   3 +-
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |   3 +-
 10 files changed, 213 insertions(+), 12 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
index 60f9e027fb66..a06a99c5d311 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -23,6 +23,7 @@
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_amdkfd_arcturus.h"
 #include "amdgpu_amdkfd_gfx_v9.h"
+#include "amdgpu_amdkfd_aldebaran.h"
 #include "gc/gc_9_4_2_offset.h"
 #include "gc/gc_9_4_2_sh_mask.h"
 #include 
@@ -36,7 +37,7 @@
  * initialize the debug mode registers after it has disabled GFX off during the
  * debug session.
  */
-static uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
+uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
 bool restore_dbg_registers,
 uint32_t vmid)
 {
@@ -107,7 +108,7 @@ static uint32_t 
kgd_aldebaran_set_wave_launch_trap_override(struct amdgpu_device
 return data;
 }

-static uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev,
+uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev,
 uint8_t wave_launch_mode,
 uint32_t vmid)
 {
@@ -125,7 +126,8 @@ static uint32_t kgd_gfx_aldebaran_set_address_watch(
 uint32_t watch_address_mask,
 uint32_t watch_id,
 uint32_t watch_mode,
-   uint32_t debug_vmid)
+   uint32_t debug_vmid,
+   uint32_t inst )
 {
 uint32_t watch_address_high;
 uint32_t watch_address_low;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
new file mode 100644
index ..a7bdaf8d82dd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
+   bool restore_dbg_registers,
+   uint32_t vmid);
+uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev,
+   uint8_t 

[PATCH v5 6/6] drm/doc: Define KMS atomic state set

2023-07-07 Thread André Almeida
From: Pekka Paalanen 

Specify how the atomic state is maintained between userspace and
kernel, plus the special case for async flips.

Signed-off-by: Pekka Paalanen 
Signed-off-by: André Almeida 
---
v4: total rework by Pekka
---
 Documentation/gpu/drm-uapi.rst | 41 ++
 1 file changed, 41 insertions(+)

diff --git a/Documentation/gpu/drm-uapi.rst b/Documentation/gpu/drm-uapi.rst
index 65fb3036a580..6a1662c08901 100644
--- a/Documentation/gpu/drm-uapi.rst
+++ b/Documentation/gpu/drm-uapi.rst
@@ -486,3 +486,44 @@ and the CRTC index is its position in this array.
 
 .. kernel-doc:: include/uapi/drm/drm_mode.h
:internal:
+
+KMS atomic state
+
+
+An atomic commit can change multiple KMS properties in an atomic fashion,
+without ever applying intermediate or partial state changes.  Either the whole
+commit succeeds or fails, and it will never be applied partially. This is the
+fundamental improvement of the atomic API over the older non-atomic API which 
is
+referred to as the "legacy API".  Applying intermediate state could 
unexpectedly
+fail, cause visible glitches, or delay reaching the final state.
+
+An atomic commit can be flagged with DRM_MODE_ATOMIC_TEST_ONLY, which means the
+complete state change is validated but not applied.  Userspace should use this
+flag to validate any state change before asking to apply it. If validation 
fails
+for any reason, userspace should attempt to fall back to another, perhaps
+simpler, final state.  This allows userspace to probe for various 
configurations
+without causing visible glitches on screen and without the need to undo a
+probing change.
+
+The changes recorded in an atomic commit apply on top the current KMS state in
+the kernel. Hence, the complete new KMS state is the complete old KMS state 
with
+the committed property settings done on top. The kernel will automatically 
avoid
+no-operation changes, so it is safe and even expected for userspace to send
+redundant property settings.  No-operation changes do not count towards 
actually
+needed changes, e.g.  setting MODE_ID to a different blob with identical
+contents as the current KMS state shall not be a modeset on its own.
+
+A "modeset" is a change in KMS state that might enable, disable, or temporarily
+disrupt the emitted video signal, possibly causing visible glitches on screen. 
A
+modeset may also take considerably more time to complete than other kinds of
+changes, and the video sink might also need time to adapt to the new signal
+properties. Therefore a modeset must be explicitly allowed with the flag
+DRM_MODE_ATOMIC_ALLOW_MODESET.  This in combination with
+DRM_MODE_ATOMIC_TEST_ONLY allows userspace to determine if a state change is
+likely to cause visible disruption on screen and avoid such changes when end
+users do not expect them.
+
+An atomic commit with the flag DRM_MODE_PAGE_FLIP_ASYNC is allowed to
+effectively change only the FB_ID property on any planes. No-operation changes
+are ignored as always. Changing any other property will cause the commit to be
+rejected.
-- 
2.41.0



[PATCH v5 5/6] drm: Refuse to async flip with atomic prop changes

2023-07-07 Thread André Almeida
Given that prop changes may lead to modesetting, which would defeat the
fast path of the async flip, refuse any atomic prop change for async
flips in atomic API. The only exceptions are the framebuffer ID to flip
to and the mode ID, that could be referring to an identical mode.

Signed-off-by: André Almeida 
---
v4: new patch
---
 drivers/gpu/drm/drm_atomic_helper.c |  5 +++
 drivers/gpu/drm/drm_atomic_uapi.c   | 52 +++--
 drivers/gpu/drm/drm_crtc_internal.h |  2 +-
 drivers/gpu/drm/drm_mode_object.c   |  2 +-
 4 files changed, 56 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic_helper.c 
b/drivers/gpu/drm/drm_atomic_helper.c
index 2c2c9caf0be5..1e2973f0e1f6 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -629,6 +629,11 @@ drm_atomic_helper_check_modeset(struct drm_device *dev,
WARN_ON(!drm_modeset_is_locked(>mutex));
 
if (!drm_mode_equal(_crtc_state->mode, 
_crtc_state->mode)) {
+   if (new_crtc_state->async_flip) {
+   drm_dbg_atomic(dev, "[CRTC:%d:%s] no mode 
changes allowed during async flip\n",
+  crtc->base.id, crtc->name);
+   return -EINVAL;
+   }
drm_dbg_atomic(dev, "[CRTC:%d:%s] mode changed\n",
   crtc->base.id, crtc->name);
new_crtc_state->mode_changed = true;
diff --git a/drivers/gpu/drm/drm_atomic_uapi.c 
b/drivers/gpu/drm/drm_atomic_uapi.c
index dfd4cf7169df..536c21f53b5f 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -972,13 +972,28 @@ int drm_atomic_connector_commit_dpms(struct 
drm_atomic_state *state,
return ret;
 }
 
+static int drm_atomic_check_prop_changes(int ret, uint64_t old_val, uint64_t 
prop_value,
+struct drm_property *prop)
+{
+   if (ret != 0 || old_val != prop_value) {
+   drm_dbg_atomic(prop->dev,
+  "[PROP:%d:%s] No prop can be changed during 
async flip\n",
+  prop->base.id, prop->name);
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
 int drm_atomic_set_property(struct drm_atomic_state *state,
struct drm_file *file_priv,
struct drm_mode_object *obj,
struct drm_property *prop,
-   uint64_t prop_value)
+   uint64_t prop_value,
+   bool async_flip)
 {
struct drm_mode_object *ref;
+   uint64_t old_val;
int ret;
 
if (!drm_property_change_valid_get(prop, prop_value, ))
@@ -995,6 +1010,13 @@ int drm_atomic_set_property(struct drm_atomic_state 
*state,
break;
}
 
+   if (async_flip) {
+   ret = drm_atomic_connector_get_property(connector, 
connector_state,
+   prop, _val);
+   ret = drm_atomic_check_prop_changes(ret, old_val, 
prop_value, prop);
+   break;
+   }
+
ret = drm_atomic_connector_set_property(connector,
connector_state, file_priv,
prop, prop_value);
@@ -1003,6 +1025,7 @@ int drm_atomic_set_property(struct drm_atomic_state 
*state,
case DRM_MODE_OBJECT_CRTC: {
struct drm_crtc *crtc = obj_to_crtc(obj);
struct drm_crtc_state *crtc_state;
+   struct drm_mode_config *config = >dev->mode_config;
 
crtc_state = drm_atomic_get_crtc_state(state, crtc);
if (IS_ERR(crtc_state)) {
@@ -1010,6 +1033,18 @@ int drm_atomic_set_property(struct drm_atomic_state 
*state,
break;
}
 
+   /*
+* We allow mode_id changes here for async flips, because we
+* check later on drm_atomic_helper_check_modeset() callers if
+* there are modeset changes or they are equal
+*/
+   if (async_flip && prop != config->prop_mode_id) {
+   ret = drm_atomic_crtc_get_property(crtc, crtc_state,
+  prop, _val);
+   ret = drm_atomic_check_prop_changes(ret, old_val, 
prop_value, prop);
+   break;
+   }
+
ret = drm_atomic_crtc_set_property(crtc,
crtc_state, prop, prop_value);
break;
@@ -1017,6 +1052,7 @@ int drm_atomic_set_property(struct drm_atomic_state 
*state,
case DRM_MODE_OBJECT_PLANE: {
struct drm_plane *plane = 

[PATCH v5 4/6] amd/display: indicate support for atomic async page-flips on DC

2023-07-07 Thread André Almeida
From: Simon Ser 

amdgpu_dm_commit_planes() already sets the flip_immediate flag for
async page-flips. This flag is used to set the UNP_FLIP_CONTROL
register. Thus, no additional change is required to handle async
page-flips with the atomic uAPI.

Signed-off-by: Simon Ser 
Reviewed-by: André Almeida 
Reviewed-by: Alex Deucher 
Signed-off-by: André Almeida 
---
v4: no changes
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 258461826140..7acd73e5004f 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -3970,7 +3970,6 @@ static int amdgpu_dm_mode_config_init(struct 
amdgpu_device *adev)
adev_to_drm(adev)->mode_config.prefer_shadow = 1;
/* indicates support for immediate flip */
adev_to_drm(adev)->mode_config.async_page_flip = true;
-   adev_to_drm(adev)->mode_config.atomic_async_page_flip_not_supported = 
true;
 
state = kzalloc(sizeof(*state), GFP_KERNEL);
if (!state)
-- 
2.41.0



[PATCH v5 3/6] drm: introduce drm_mode_config.atomic_async_page_flip_not_supported

2023-07-07 Thread André Almeida
From: Simon Ser 

This new field indicates whether the driver has the necessary logic
to support async page-flips via the atomic uAPI. This is leveraged by
the next commit to allow user-space to use this functionality.

All atomic drivers setting drm_mode_config.async_page_flip are updated
to also set drm_mode_config.atomic_async_page_flip_not_supported. We
will gradually check and update these drivers to properly handle
drm_crtc_state.async_flip in their atomic logic.

The goal of this negative flag is the same as
fb_modifiers_not_supported: we want to eventually get rid of all
drivers missing atomic support for async flips. New drivers should not
set this flag, instead they should support atomic async flips (if
they support async flips at all). IOW, we don't want more drivers
with async flip support for legacy but not atomic.

Signed-off-by: Simon Ser 
Reviewed-by: André Almeida 
Reviewed-by: Alex Deucher 
Signed-off-by: André Almeida 
---
v4: no changes
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  1 +
 drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c  |  1 +
 drivers/gpu/drm/i915/display/intel_display.c  |  1 +
 drivers/gpu/drm/nouveau/nouveau_display.c |  1 +
 include/drm/drm_mode_config.h | 11 +++
 5 files changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 7acd73e5004f..258461826140 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -3970,6 +3970,7 @@ static int amdgpu_dm_mode_config_init(struct 
amdgpu_device *adev)
adev_to_drm(adev)->mode_config.prefer_shadow = 1;
/* indicates support for immediate flip */
adev_to_drm(adev)->mode_config.async_page_flip = true;
+   adev_to_drm(adev)->mode_config.atomic_async_page_flip_not_supported = 
true;
 
state = kzalloc(sizeof(*state), GFP_KERNEL);
if (!state)
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c 
b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
index 29603561d501..8afb22b1e730 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
@@ -639,6 +639,7 @@ static int atmel_hlcdc_dc_modeset_init(struct drm_device 
*dev)
dev->mode_config.max_height = dc->desc->max_height;
dev->mode_config.funcs = _config_funcs;
dev->mode_config.async_page_flip = true;
+   dev->mode_config.atomic_async_page_flip_not_supported = true;
 
return 0;
 }
diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
b/drivers/gpu/drm/i915/display/intel_display.c
index 0aae9a1eb3d5..a5c503ca9168 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -8318,6 +8318,7 @@ static void intel_mode_config_init(struct 
drm_i915_private *i915)
mode_config->helper_private = _mode_config_funcs;
 
mode_config->async_page_flip = HAS_ASYNC_FLIPS(i915);
+   mode_config->atomic_async_page_flip_not_supported = true;
 
/*
 * Maximum framebuffer dimensions, chosen to match
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c 
b/drivers/gpu/drm/nouveau/nouveau_display.c
index ec3487fc..f497dcd9e22f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -709,6 +709,7 @@ nouveau_display_create(struct drm_device *dev)
dev->mode_config.async_page_flip = false;
else
dev->mode_config.async_page_flip = true;
+   dev->mode_config.atomic_async_page_flip_not_supported = true;
 
drm_kms_helper_poll_init(dev);
drm_kms_helper_poll_disable(dev);
diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h
index 973119a9176b..47b005671e6a 100644
--- a/include/drm/drm_mode_config.h
+++ b/include/drm/drm_mode_config.h
@@ -918,6 +918,17 @@ struct drm_mode_config {
 */
bool async_page_flip;
 
+   /**
+* @atomic_async_page_flip_not_supported:
+*
+* If true, the driver does not support async page-flips with the
+* atomic uAPI. This is only used by old drivers which haven't yet
+* accomodated for _crtc_state.async_flip in their atomic logic,
+* even if they have _mode_config.async_page_flip set to true.
+* New drivers shall not set this flag.
+*/
+   bool atomic_async_page_flip_not_supported;
+
/**
 * @fb_modifiers_not_supported:
 *
-- 
2.41.0



[PATCH v5 2/6] drm: introduce DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP

2023-07-07 Thread André Almeida
From: Simon Ser 

This new kernel capability indicates whether async page-flips are
supported via the atomic uAPI. DRM clients can use it to check
for support before feeding DRM_MODE_PAGE_FLIP_ASYNC to the kernel.

Make it clear that DRM_CAP_ASYNC_PAGE_FLIP is for legacy uAPI only.

Signed-off-by: Simon Ser 
Reviewed-by: André Almeida 
Reviewed-by: Alex Deucher 
Signed-off-by: André Almeida 
---
v4: no changes
---
 drivers/gpu/drm/drm_ioctl.c |  5 +
 include/uapi/drm/drm.h  | 10 +-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 7c9d66ee917d..8f756b99260d 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -302,6 +302,11 @@ static int drm_getcap(struct drm_device *dev, void *data, 
struct drm_file *file_
case DRM_CAP_CRTC_IN_VBLANK_EVENT:
req->value = 1;
break;
+   case DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP:
+   req->value = drm_core_check_feature(dev, DRIVER_ATOMIC) &&
+dev->mode_config.async_page_flip &&
+
!dev->mode_config.atomic_async_page_flip_not_supported;
+   break;
default:
return -EINVAL;
}
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index a87ca2d4..54c558f81f3c 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -706,7 +706,8 @@ struct drm_gem_open {
 /**
  * DRM_CAP_ASYNC_PAGE_FLIP
  *
- * If set to 1, the driver supports _MODE_PAGE_FLIP_ASYNC.
+ * If set to 1, the driver supports _MODE_PAGE_FLIP_ASYNC for legacy
+ * page-flips.
  */
 #define DRM_CAP_ASYNC_PAGE_FLIP0x7
 /**
@@ -767,6 +768,13 @@ struct drm_gem_open {
  * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects".
  */
 #define DRM_CAP_SYNCOBJ_TIMELINE   0x14
+/**
+ * DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP
+ *
+ * If set to 1, the driver supports _MODE_PAGE_FLIP_ASYNC for atomic
+ * commits.
+ */
+#define DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP 0x15
 
 /* DRM_IOCTL_GET_CAP ioctl argument type */
 struct drm_get_cap {
-- 
2.41.0



[PATCH v5 1/6] drm: allow DRM_MODE_PAGE_FLIP_ASYNC for atomic commits

2023-07-07 Thread André Almeida
From: Simon Ser 

If the driver supports it, allow user-space to supply the
DRM_MODE_PAGE_FLIP_ASYNC flag to request an async page-flip.
Set drm_crtc_state.async_flip accordingly.

Document that drivers will reject atomic commits if an async
flip isn't possible. This allows user-space to fall back to
something else. For instance, Xorg falls back to a blit.
Another option is to wait as close to the next vblank as
possible before performing the page-flip to reduce latency.

Signed-off-by: Simon Ser 
Reviewed-by: Alex Deucher 
Co-developed-by: André Almeida 
Signed-off-by: André Almeida 
---
v4: no changes
---
 drivers/gpu/drm/drm_atomic_uapi.c | 28 +---
 include/uapi/drm/drm_mode.h   |  9 +
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic_uapi.c 
b/drivers/gpu/drm/drm_atomic_uapi.c
index d867e7f9f2cd..dfd4cf7169df 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -1286,6 +1286,18 @@ static void complete_signaling(struct drm_device *dev,
kfree(fence_state);
 }
 
+static void
+set_async_flip(struct drm_atomic_state *state)
+{
+   struct drm_crtc *crtc;
+   struct drm_crtc_state *crtc_state;
+   int i;
+
+   for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
+   crtc_state->async_flip = true;
+   }
+}
+
 int drm_mode_atomic_ioctl(struct drm_device *dev,
  void *data, struct drm_file *file_priv)
 {
@@ -1326,9 +1338,16 @@ int drm_mode_atomic_ioctl(struct drm_device *dev,
}
 
if (arg->flags & DRM_MODE_PAGE_FLIP_ASYNC) {
-   drm_dbg_atomic(dev,
-  "commit failed: invalid flag 
DRM_MODE_PAGE_FLIP_ASYNC\n");
-   return -EINVAL;
+   if (!dev->mode_config.async_page_flip) {
+   drm_dbg_atomic(dev,
+  "commit failed: DRM_MODE_PAGE_FLIP_ASYNC 
not supported\n");
+   return -EINVAL;
+   }
+   if (dev->mode_config.atomic_async_page_flip_not_supported) {
+   drm_dbg_atomic(dev,
+  "commit failed: DRM_MODE_PAGE_FLIP_ASYNC 
not supported with atomic\n");
+   return -EINVAL;
+   }
}
 
/* can't test and expect an event at the same time. */
@@ -1426,6 +1445,9 @@ int drm_mode_atomic_ioctl(struct drm_device *dev,
if (ret)
goto out;
 
+   if (arg->flags & DRM_MODE_PAGE_FLIP_ASYNC)
+   set_async_flip(state);
+
if (arg->flags & DRM_MODE_ATOMIC_TEST_ONLY) {
ret = drm_atomic_check_only(state);
} else if (arg->flags & DRM_MODE_ATOMIC_NONBLOCK) {
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 46becedf5b2f..56342ba2c11a 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -949,6 +949,15 @@ struct hdr_output_metadata {
  * Request that the page-flip is performed as soon as possible, ie. with no
  * delay due to waiting for vblank. This may cause tearing to be visible on
  * the screen.
+ *
+ * When used with atomic uAPI, the driver will return an error if the hardware
+ * doesn't support performing an asynchronous page-flip for this update.
+ * User-space should handle this, e.g. by falling back to a regular page-flip.
+ *
+ * Note, some hardware might need to perform one last synchronous page-flip
+ * before being able to switch to asynchronous page-flips. As an exception,
+ * the driver will return success even though that first page-flip is not
+ * asynchronous.
  */
 #define DRM_MODE_PAGE_FLIP_ASYNC 0x02
 #define DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE 0x4
-- 
2.41.0



[PATCH v5 0/6] drm: Add support for atomic async page-flip

2023-07-07 Thread André Almeida
Hi,

This work from me and Simon adds support for DRM_MODE_PAGE_FLIP_ASYNC through
the atomic API. This feature is already available via the legacy API. The use
case is to be able to present a new frame immediately (or as soon as
possible), even if after missing a vblank. This might result in tearing, but
it's useful when a high framerate is desired, such as for gaming.

Differently from earlier versions, this one refuses to flip if any prop changes
for async flips. The idea is that the fast path of immediate page flips doesn't
play well with modeset changes, so only the fb_id can be changed. The exception
is for mode_id changes, that might be referring to an identical mode (which
would skip a modeset). This is done to make the async API more similar to the
normal API.

Thanks,
André

- User-space patch: https://github.com/Plagman/gamescope/pull/595
- IGT tests: 
https://gitlab.freedesktop.org/andrealmeid/igt-gpu-tools/-/tree/atomic_async_page_flip

Changes from v4:
 - Documentation rewrote by Pekka Paalanen

v4: 
https://lore.kernel.org/dri-devel/20230701020917.143394-1-andrealm...@igalia.com/

Changes from v3:
 - Add new patch to reject prop changes
 - Add a documentation clarifying the KMS atomic state set

v3: 
https://lore.kernel.org/dri-devel/20220929184307.258331-1-cont...@emersion.fr/

André Almeida (1):
  drm: Refuse to async flip with atomic prop changes

Pekka Paalanen (1):
  drm/doc: Define KMS atomic state set

Simon Ser (4):
  drm: allow DRM_MODE_PAGE_FLIP_ASYNC for atomic commits
  drm: introduce DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP
  drm: introduce drm_mode_config.atomic_async_page_flip_not_supported
  amd/display: indicate support for atomic async page-flips on DC

 Documentation/gpu/drm-uapi.rst   | 41 ++
 drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c |  1 +
 drivers/gpu/drm/drm_atomic_helper.c  |  5 ++
 drivers/gpu/drm/drm_atomic_uapi.c| 80 ++--
 drivers/gpu/drm/drm_crtc_internal.h  |  2 +-
 drivers/gpu/drm/drm_ioctl.c  |  5 ++
 drivers/gpu/drm/drm_mode_object.c|  2 +-
 drivers/gpu/drm/i915/display/intel_display.c |  1 +
 drivers/gpu/drm/nouveau/nouveau_display.c|  1 +
 include/drm/drm_mode_config.h| 11 +++
 include/uapi/drm/drm.h   | 10 ++-
 include/uapi/drm/drm_mode.h  |  9 +++
 12 files changed, 159 insertions(+), 9 deletions(-)

-- 
2.41.0



Re: [regression][6.5] KASAN: slab-out-of-bounds in amdgpu_vm_pt_create+0x555/0x670 [amdgpu] on Radeon 7900XTX

2023-07-07 Thread Mikhail Gavrilov
On Fri, Jul 7, 2023 at 6:01 AM Chen, Guchun  wrote:
>
> [Public]
>
> Hi Mike,
>
> Yes, we are aware of this problem, and we are working on that. The problem is 
> caused by recent code stores xcp_id to amdgpu bo for accounting memory usage 
> and so on. However, not all VMs are attached to that like the case in 
> amdgpu_mes_self_test.
>

I would like to take part in testing the fix.

-- 
Best Regards,
Mike Gavrilov.


Re: [PATCH 0/2] Disable dynamic switching for SMU13 on Intel hosts

2023-07-07 Thread Alex Deucher
On Fri, Jul 7, 2023 at 3:32 PM Mario Limonciello
 wrote:
>
> When ASPM is enabled, DPM is used to perform dynamic switching.  When
> connected to an Intel PCIe controller this causes malfunctions.
>
> Identify this combination and disable dynamic switching in SMU13.
>
> This series superceeds my other series [1] and fixes it in a cleaner way.
>
> [1] https://patchwork.freedesktop.org/series/120245/
>
> Evan Quan (1):
>   drm/amd/pm: share the code around SMU13 pcie parameters update
>
> Mario Limonciello (1):
>   drm/amd/pm: conditionally disable pcie lane/speed switching for SMU13

Series is:
Reviewed-by: Alex Deucher 

We should also fix up the smu11 code in sienna_cichlid_ppt.c
similarly.  We implemented a similar, but more limited fix there
recently.

Alex


>
>  drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h  |  4 ++
>  .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c| 67 +++
>  .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  | 33 +
>  .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c  | 33 +
>  4 files changed, 73 insertions(+), 64 deletions(-)
>
> --
> 2.25.1
>


Re: [PATCH libdrm v2] amdgpu: Use PRI?64 to format uint64_t

2023-07-07 Thread Geert Uytterhoeven
Hi Christian,

On Fri, Jul 7, 2023 at 2:06 PM Christian König
 wrote:
> Am 06.07.23 um 10:36 schrieb Geert Uytterhoeven:
> > On 32-bit:
> >
> >  ../tests/amdgpu/amdgpu_stress.c: In function ‘alloc_bo’:
> >  ../tests/amdgpu/amdgpu_stress.c:178:49: warning: format ‘%lx’ expects 
> > argument of type ‘long unsigned int’, but argument 4 has type ‘uint64_t’ 
> > {aka ‘long long unsigned int’} [-Wformat=]
> >fprintf(stdout, "Allocated BO number %u at 0x%lx, domain 0x%x, size 
> > %lu\n",
> > ~~^
> > %llx
> > num_buffers++, addr, domain, size);
> >

[...]

> > Fix this by using the proper "PRI?64" format specifiers.
> >
> > Fixes: d77ccdf3ba6f5a39 ("amdgpu: add amdgpu_stress utility v2")
> > Signed-off-by: Geert Uytterhoeven 
>
> Well generally good patch, but libdrm changes are now reviewed by merge
> request and not on the mailing list any more.

I heard such a rumor, too ;-)

Unfortunately one year later, that process is still not documented in
https://gitlab.freedesktop.org/mesa/drm/-/blob/main/CONTRIBUTING.rst
which still instructs me (a casual drive-by developer) to just submit
my patches to the mailing list...

Thanks!

Gr{oetje,eeting}s,

Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds


[PATCH 1/2] drm/amd/pm: share the code around SMU13 pcie parameters update

2023-07-07 Thread Mario Limonciello
From: Evan Quan 

So that SMU13.0.0 and SMU13.0.7 do not need to have one copy each.

Signed-off-by: Evan Quan 
Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h  |  4 +++
 .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c| 31 +
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  | 33 +--
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c  | 33 +--
 4 files changed, 37 insertions(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index 6a0ac0bbaace9..355c156d871af 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -295,5 +295,9 @@ int smu_v13_0_get_pptable_from_firmware(struct smu_context 
*smu,
uint32_t *size,
uint32_t pptable_id);
 
+int smu_v13_0_update_pcie_parameters(struct smu_context *smu,
+uint32_t pcie_gen_cap,
+uint32_t pcie_width_cap);
+
 #endif
 #endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index 3856da6c3f3d2..2ef877c2cb590 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -2424,3 +2424,34 @@ int smu_v13_0_mode1_reset(struct smu_context *smu)
 
return ret;
 }
+
+int smu_v13_0_update_pcie_parameters(struct smu_context *smu,
+uint32_t pcie_gen_cap,
+uint32_t pcie_width_cap)
+{
+   struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context;
+   struct smu_13_0_pcie_table *pcie_table =
+   _context->dpm_tables.pcie_table;
+   uint32_t smu_pcie_arg;
+   int ret, i;
+
+   for (i = 0; i < pcie_table->num_of_link_levels; i++) {
+   if (pcie_table->pcie_gen[i] > pcie_gen_cap)
+   pcie_table->pcie_gen[i] = pcie_gen_cap;
+   if (pcie_table->pcie_lane[i] > pcie_width_cap)
+   pcie_table->pcie_lane[i] = pcie_width_cap;
+
+   smu_pcie_arg = i << 16;
+   smu_pcie_arg |= pcie_table->pcie_gen[i] << 8;
+   smu_pcie_arg |= pcie_table->pcie_lane[i];
+
+   ret = smu_cmn_send_smc_msg_with_param(smu,
+ 
SMU_MSG_OverridePcieParameters,
+ smu_pcie_arg,
+ NULL);
+   if (ret)
+   return ret;
+   }
+
+   return 0;
+}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 1d995f53aaaba..b9bde5fa8f8f5 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -1645,37 +1645,6 @@ static int smu_v13_0_0_force_clk_levels(struct 
smu_context *smu,
return ret;
 }
 
-static int smu_v13_0_0_update_pcie_parameters(struct smu_context *smu,
- uint32_t pcie_gen_cap,
- uint32_t pcie_width_cap)
-{
-   struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context;
-   struct smu_13_0_pcie_table *pcie_table =
-   _context->dpm_tables.pcie_table;
-   uint32_t smu_pcie_arg;
-   int ret, i;
-
-   for (i = 0; i < pcie_table->num_of_link_levels; i++) {
-   if (pcie_table->pcie_gen[i] > pcie_gen_cap)
-   pcie_table->pcie_gen[i] = pcie_gen_cap;
-   if (pcie_table->pcie_lane[i] > pcie_width_cap)
-   pcie_table->pcie_lane[i] = pcie_width_cap;
-
-   smu_pcie_arg = i << 16;
-   smu_pcie_arg |= pcie_table->pcie_gen[i] << 8;
-   smu_pcie_arg |= pcie_table->pcie_lane[i];
-
-   ret = smu_cmn_send_smc_msg_with_param(smu,
- 
SMU_MSG_OverridePcieParameters,
- smu_pcie_arg,
- NULL);
-   if (ret)
-   return ret;
-   }
-
-   return 0;
-}
-
 static const struct smu_temperature_range smu13_thermal_policy[] = {
{-273150,  99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000},
{ 12, 12, 12, 12, 12, 12, 12, 12, 
12},
@@ -2654,7 +2623,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = 
{
.feature_is_enabled = smu_cmn_feature_is_enabled,
.print_clk_levels = smu_v13_0_0_print_clk_levels,
.force_clk_levels = smu_v13_0_0_force_clk_levels,
-   

[PATCH 2/2] drm/amd/pm: conditionally disable pcie lane/speed switching for SMU13

2023-07-07 Thread Mario Limonciello
Intel platforms such as Sapphire Rapids and Raptor Lake don't support
dynamic pcie lane or speed switching.

This limitation seems to carry over from one generation to another.
To be safer, disable dynamic pcie lane width and speed switching when
running on an Intel platform.

Link: 
https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2663
Co-developed-by: Evan Quan 
Signed-off-by: Evan Quan 
Signed-off-by: Mario Limonciello 
---
 .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c| 42 +--
 1 file changed, 39 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index 2ef877c2cb590..cf7e729020ab6 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -2425,6 +2425,25 @@ int smu_v13_0_mode1_reset(struct smu_context *smu)
return ret;
 }
 
+/*
+ * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
+ * speed switching. Until we have confirmation from Intel that a specific host
+ * supports it, it's safer that we keep it disabled for all.
+ *
+ * 
https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
+ * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
+ */
+static bool smu_v13_0_is_pcie_dynamic_switching_supported(void)
+{
+#if IS_ENABLED(CONFIG_X86)
+   struct cpuinfo_x86 *c = _data(0);
+
+   if (c->x86_vendor == X86_VENDOR_INTEL)
+   return false;
+#endif
+   return true;
+}
+
 int smu_v13_0_update_pcie_parameters(struct smu_context *smu,
 uint32_t pcie_gen_cap,
 uint32_t pcie_width_cap)
@@ -2432,15 +2451,32 @@ int smu_v13_0_update_pcie_parameters(struct smu_context 
*smu,
struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context;
struct smu_13_0_pcie_table *pcie_table =
_context->dpm_tables.pcie_table;
+   int num_of_levels = pcie_table->num_of_link_levels;
uint32_t smu_pcie_arg;
int ret, i;
 
-   for (i = 0; i < pcie_table->num_of_link_levels; i++) {
-   if (pcie_table->pcie_gen[i] > pcie_gen_cap)
+   if (!smu_v13_0_is_pcie_dynamic_switching_supported()) {
+   if (pcie_table->pcie_gen[num_of_levels - 1] < pcie_gen_cap)
+   pcie_gen_cap = pcie_table->pcie_gen[num_of_levels - 1];
+
+   if (pcie_table->pcie_lane[num_of_levels - 1] < pcie_width_cap)
+   pcie_width_cap = pcie_table->pcie_lane[num_of_levels - 
1];
+
+   /* Force all levels to use the same settings */
+   for (i = 0; i < num_of_levels; i++) {
pcie_table->pcie_gen[i] = pcie_gen_cap;
-   if (pcie_table->pcie_lane[i] > pcie_width_cap)
pcie_table->pcie_lane[i] = pcie_width_cap;
+   }
+   } else {
+   for (i = 0; i < num_of_levels; i++) {
+   if (pcie_table->pcie_gen[i] > pcie_gen_cap)
+   pcie_table->pcie_gen[i] = pcie_gen_cap;
+   if (pcie_table->pcie_lane[i] > pcie_width_cap)
+   pcie_table->pcie_lane[i] = pcie_width_cap;
+   }
+   }
 
+   for (i = 0; i < num_of_levels; i++) {
smu_pcie_arg = i << 16;
smu_pcie_arg |= pcie_table->pcie_gen[i] << 8;
smu_pcie_arg |= pcie_table->pcie_lane[i];
-- 
2.25.1



[PATCH 0/2] Disable dynamic switching for SMU13 on Intel hosts

2023-07-07 Thread Mario Limonciello
When ASPM is enabled, DPM is used to perform dynamic switching.  When
connected to an Intel PCIe controller this causes malfunctions.

Identify this combination and disable dynamic switching in SMU13.

This series superceeds my other series [1] and fixes it in a cleaner way.

[1] https://patchwork.freedesktop.org/series/120245/

Evan Quan (1):
  drm/amd/pm: share the code around SMU13 pcie parameters update

Mario Limonciello (1):
  drm/amd/pm: conditionally disable pcie lane/speed switching for SMU13

 drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h  |  4 ++
 .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c| 67 +++
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  | 33 +
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c  | 33 +
 4 files changed, 73 insertions(+), 64 deletions(-)

-- 
2.25.1



RE: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance

2023-07-07 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Kim, Jonathan
> Sent: Friday, July 7, 2023 1:06 PM
> To: Huang, JinHuiEric ; amd-
> g...@lists.freedesktop.org
> Cc: Joshi, Mukul 
> Subject: RE: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance
>
>
>
> > -Original Message-
> > From: Huang, JinHuiEric 
> > Sent: Friday, July 7, 2023 12:44 PM
> > To: Kim, Jonathan ; amd-
> g...@lists.freedesktop.org
> > Cc: Joshi, Mukul 
> > Subject: Re: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance
> >
> >
> > On 2023-07-07 11:56, Kim, Jonathan wrote:
> > > [Public]
> > >
> > >> -Original Message-
> > >> From: Huang, JinHuiEric 
> > >> Sent: Friday, July 7, 2023 11:46 AM
> > >> To: Kim, Jonathan ; amd-
> > g...@lists.freedesktop.org
> > >> Subject: Re: [PATCH 4/6] drm/amdkfd: enable grace period for xcc
> instance
> > >>
> > >>
> > >> On 2023-07-07 10:59, Kim, Jonathan wrote:
> > >>> [Public]
> > >>>
> >  -Original Message-
> >  From: Huang, JinHuiEric 
> >  Sent: Thursday, July 6, 2023 2:19 PM
> >  To: amd-gfx@lists.freedesktop.org
> >  Cc: Kim, Jonathan ; Huang, JinHuiEric
> >  
> >  Subject: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance
> > 
> >  each xcc instance needs to get iq wait time and set
> >  grace period accordingly.
> > 
> >  Signed-off-by: Eric Huang 
> >  ---
> > .../drm/amd/amdkfd/kfd_device_queue_manager.c |  9 --
> > .../drm/amd/amdkfd/kfd_device_queue_manager.h |  2 +-
> > .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 32 +++-
> --
> > 
> > >> -
> > .../drm/amd/amdkfd/kfd_packet_manager_v9.c|  9 +++---
> > drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  2 +-
> > 5 files changed, 32 insertions(+), 22 deletions(-)
> > 
> >  diff --git
> a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> >  b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> >  index a2bff3f01359..0f12c1989e14 100644
> >  --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> >  +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> >  @@ -1606,6 +1606,8 @@ static int set_sched_resources(struct
> >  device_queue_manager *dqm)
> > 
> > static int initialize_cpsch(struct device_queue_manager *dqm)
> > {
> >  + uint32_t xcc_id, xcc_mask = dqm->dev->xcc_mask;
> >  +
> >  pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
> > 
> >  mutex_init(>lock_hidden);
> >  @@ -1620,8 +1622,11 @@ static int initialize_cpsch(struct
> >  device_queue_manager *dqm)
> >  init_sdma_bitmaps(dqm);
> > 
> >  if (dqm->dev->kfd2kgd->get_iq_wait_times)
> >  - dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
> >  - >wait_times, 0);
> >  + for_each_inst(xcc_id, xcc_mask)
> >  + dqm->dev->kfd2kgd->get_iq_wait_times(
> >  + dqm->dev->adev,
> >  + >wait_times[xcc_id],
> >  + xcc_id);
> >  return 0;
> > }
> > 
> >  diff --git
> a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> >  b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> >  index 7dd4b177219d..62a6dc8d3032 100644
> >  --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> >  +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> >  @@ -262,7 +262,7 @@ struct device_queue_manager {
> >  /* used for GFX 9.4.3 only */
> >  uint32_tcurrent_logical_xcc_start;
> > 
> >  - uint32_twait_times;
> >  + uint32_twait_times[32];
> > >>> I think wait_times[16] should be sufficient.  We only get the hamming
> > >> weight of 16 bits for NUM_XCC and I believe the xcc_mask is declared as
> a
> > >> uint16_t in the KGD portion anyway.  We may as well align to that.
> >  wait_queue_head_t   destroy_wait;
> > };
> >  diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> >  b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> >  index 401096c103b2..f37ab4b6d88c 100644
> >  --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> >  +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> >  @@ -374,27 +374,31 @@ int pm_update_grace_period(struct
> >  packet_manager *pm, uint32_t grace_period)
> > {
> >  int retval = 0;
> >  uint32_t *buffer, size;
> >  + uint32_t xcc_id, xcc_mask = pm->dqm->dev->xcc_mask;
> > 
> >  size = pm->pmf->set_grace_period_size;
> > 
> >  mutex_lock(>lock);
> > 
> >  if (size) {
> >  - kq_acquire_packet_buffer(pm->priv_queue,
> > 

Re: [PATCH] [v2] drm/amdgpu: avoid integer overflow warning in amdgpu_device_resize_fb_bar()

2023-07-07 Thread Alex Deucher
Applied.  thanks!

On Fri, Jul 7, 2023 at 7:47 AM Christian König  wrote:
>
> Am 07.07.23 um 13:11 schrieb Arnd Bergmann:
> > From: Arnd Bergmann 
> >
> > On 32-bit architectures comparing a resource against a value larger than
> > U32_MAX can cause a warning:
> >
> > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:1344:18: error: result of 
> > comparison of constant 4294967296 with expression of type 'resource_size_t' 
> > (aka 'unsigned int') is always false 
> > [-Werror,-Wtautological-constant-out-of-range-compare]
> >  res->start > 0x1ull)
> >  ~~ ^ ~~
> >
> > As gcc does not warn about this in dead code, add an IS_ENABLED() check at
> > the start of the function. This will always return success but not actually 
> > resize
> > the BAR on 32-bit architectures without high memory, which is exactly what
> > we want here, as the driver can fall back to bank switching the VRAM
> > access.
> >
> > Fixes: 31b8adab3247e ("drm/amdgpu: require a root bus window above 4GB for 
> > BAR resize")
> > Signed-off-by: Arnd Bergmann 
>
> Reviewed-by: Christian König 
>
> > ---
> > v2: return early instead of shutting up the warning with a cast and
> > running into a failure
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++
> >   1 file changed, 3 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > index 7f069e1731fee..fcf5f07c47751 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > @@ -1325,6 +1325,9 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device 
> > *adev)
> >   u16 cmd;
> >   int r;
> >
> > + if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
> > + return 0;
> > +
> >   /* Bypass for VF */
> >   if (amdgpu_sriov_vf(adev))
> >   return 0;
>


[PATCH 3/4] drm/amdkfd: enable watch points globally for gfx943

2023-07-07 Thread Eric Huang
From: Jonathan Kim 

Set watch points for all xcc instances on GFX943.

Signed-off-by: Jonathan Kim 
Reviewed-by: Felix Kuehling 
Signed-off-by: Eric Huang 
Reviewed-by: Jonathan Kim 
---
 drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index 24083db44724..190b03efe5ff 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -446,7 +446,8 @@ int kfd_dbg_trap_set_dev_address_watch(struct 
kfd_process_device *pdd,
uint32_t *watch_id,
uint32_t watch_mode)
 {
-   int r = kfd_dbg_get_dev_watch_id(pdd, watch_id);
+   int xcc_id, r = kfd_dbg_get_dev_watch_id(pdd, watch_id);
+   uint32_t xcc_mask = pdd->dev->xcc_mask;
 
if (r)
return r;
@@ -460,14 +461,15 @@ int kfd_dbg_trap_set_dev_address_watch(struct 
kfd_process_device *pdd,
}
 
amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
-   pdd->watch_points[*watch_id] = pdd->dev->kfd2kgd->set_address_watch(
+   for_each_inst(xcc_id, xcc_mask)
+   pdd->watch_points[*watch_id] = 
pdd->dev->kfd2kgd->set_address_watch(
pdd->dev->adev,
watch_address,
watch_address_mask,
*watch_id,
watch_mode,
pdd->dev->vm_info.last_vmid_kfd,
-   0);
+   xcc_id);
amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
 
if (!pdd->dev->kfd->shared_resources.enable_mes)
-- 
2.34.1



[PATCH 4/4] drm/amdkfd: add multi-process debugging support for GC v9.4.3

2023-07-07 Thread Eric Huang
From: Jonathan Kim 

Similar to GC v9.4.2, GC v9.4.3 should use the 5-Dword extended
MAP_PROCESS packet to support multi-process debugging.  Update the
mutli-process debug support list so that the KFD updates the runlist
on debug mode setting and that it allocates enough GTT memory during
KFD device initialization.

Signed-off-by: Jonathan Kim 
Reviewed-by: Felix Kuehling 
Signed-off-by: Eric Huang 
Reviewed-by: Jonathan Kim 
---
 drivers/gpu/drm/amd/amdkfd/kfd_debug.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
index a289e59ceb79..a0afc6a7b6c4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
@@ -76,8 +76,9 @@ int kfd_dbg_send_exception_to_runtime(struct kfd_process *p,
 
 static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_node *dev)
 {
-   return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
-  KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0);
+   return (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
+   KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
+   KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0));
 }
 
 void debug_event_write_work_handler(struct work_struct *work);
-- 
2.34.1



[PATCH 1/4] drm/amdkfd: add kfd2kgd debugger callbacks for GC v9.4.3

2023-07-07 Thread Eric Huang
From: Jonathan Kim 

Implement the similarities as GC v9.4.2, and the difference
for GC v9.4.3 HW spec, i.e. xcc instance.

Signed-off-by: Jonathan Kim 
Signed-off-by: Eric Huang 
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  |   8 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h  |  27 +++
 .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   | 166 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|   3 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h|   6 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c|   3 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |   3 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |   3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_debug.c|   3 +-
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |   3 +-
 10 files changed, 213 insertions(+), 12 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
index 60f9e027fb66..a06a99c5d311 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -23,6 +23,7 @@
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_amdkfd_arcturus.h"
 #include "amdgpu_amdkfd_gfx_v9.h"
+#include "amdgpu_amdkfd_aldebaran.h"
 #include "gc/gc_9_4_2_offset.h"
 #include "gc/gc_9_4_2_sh_mask.h"
 #include 
@@ -36,7 +37,7 @@
  * initialize the debug mode registers after it has disabled GFX off during the
  * debug session.
  */
-static uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
+uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
bool restore_dbg_registers,
uint32_t vmid)
 {
@@ -107,7 +108,7 @@ static uint32_t 
kgd_aldebaran_set_wave_launch_trap_override(struct amdgpu_device
return data;
 }
 
-static uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev,
+uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev,
uint8_t wave_launch_mode,
uint32_t vmid)
 {
@@ -125,7 +126,8 @@ static uint32_t kgd_gfx_aldebaran_set_address_watch(
uint32_t watch_address_mask,
uint32_t watch_id,
uint32_t watch_mode,
-   uint32_t debug_vmid)
+   uint32_t debug_vmid,
+   uint32_t inst )
 {
uint32_t watch_address_high;
uint32_t watch_address_low;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
new file mode 100644
index ..a7bdaf8d82dd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
+   bool restore_dbg_registers,
+   uint32_t vmid);
+uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev,
+   uint8_t wave_launch_mode,
+   uint32_t vmid);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
index 5b4b7f8b92a5..543405a28b19 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
@@ -22,6 +22,7 @@
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_amdkfd_gfx_v9.h"
+#include "amdgpu_amdkfd_aldebaran.h"
 #include "gc/gc_9_4_3_offset.h"
 #include 

[PATCH 2/4] drm/amdkfd: restore debugger additional info for gfx v9_4_3

2023-07-07 Thread Eric Huang
From: Jonathan Kim 

The additional information that the KFD reports to the debugger was
destroyed when the following commit was merged:
"drm/amdkfd: convert switches to IP version checking"

Signed-off-by: Jonathan Kim 
Reviewed-by: Harish Kasiviswanathan 
Signed-off-by: Jonathan Kim 
Acked-by: Amber Lin 
Signed-off-by: Eric Huang 
Reviewed-by: Jonathan Kim 
---
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 10 --
 drivers/gpu/drm/amd/amdkfd/kfd_topology.h |  3 +++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 61fc62f3e003..1a4cdee86759 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1932,8 +1932,14 @@ static void kfd_topology_set_capabilities(struct 
kfd_topology_device *dev)
HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_MODE_SUPPORTED;
 
if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) {
-   dev->node_props.debug_prop |= 
HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9 |
-   HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
+   if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3))
+   dev->node_props.debug_prop |=
+   HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9_4_3 |
+   HSA_DBG_WATCH_ADDR_MASK_HI_BIT_GFX9_4_3;
+   else
+   dev->node_props.debug_prop |=
+   HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9 |
+   HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
 
if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(9, 4, 2))
dev->node_props.debug_prop |=
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index cba2cd5ed9d1..dea32a9e5506 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -32,9 +32,12 @@
 #define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 32
 
 #define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX96
+#define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9_4_3 7
 #define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10   7
 #define HSA_DBG_WATCH_ADDR_MASK_HI_BIT  \
(29 << HSA_DBG_WATCH_ADDR_MASK_HI_BIT_SHIFT)
+#define HSA_DBG_WATCH_ADDR_MASK_HI_BIT_GFX9_4_3 \
+   (30 << HSA_DBG_WATCH_ADDR_MASK_HI_BIT_SHIFT)
 
 struct kfd_node_properties {
uint64_t hive_id;
-- 
2.34.1



[PATCH 0/4] Upstream debugger feature for GFX v9.4.3

2023-07-07 Thread Eric Huang
Jonathan Kim (4):
  drm/amdkfd: add kfd2kgd debugger callbacks for GC v9.4.3
  drm/amdkfd: restore debugger additional info for gfx v9_4_3
  drm/amdkfd: enable watch points globally for gfx943
  drm/amdkfd: add multi-process debugging support for GC v9.4.3

 .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  |   8 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h  |  27 +++
 .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   | 166 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|   3 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h|   6 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c|   3 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |   3 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |   3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_debug.c|   9 +-
 drivers/gpu/drm/amd/amdkfd/kfd_debug.h|   5 +-
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c |  10 +-
 drivers/gpu/drm/amd/amdkfd/kfd_topology.h |   3 +
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |   3 +-
 13 files changed, 231 insertions(+), 18 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h

-- 
2.34.1



Re: [PATCH v2] drm/amdgpu: Increase soft IH ring size

2023-07-07 Thread Philip Yang

  


On 2023-07-07 13:15, Felix Kuehling
  wrote:

On
  2023-07-07 11:49, Philip Yang wrote:
  
  Retry faults are delegated to soft IH ring
and then processed by

deferred worker. Current soft IH ring size PAGE_SIZE can store
128

entries, which may overflow and drop retry faults, causes HW
stucks

because the retry fault is not recovered.


Increase soft IH ring size to 8KB, enough to store 256 CAM
entries

because we clear the CAM entry after handling the retry fault
from soft

ring.


Define macro IH_RING_SIZE and IH_SW_RING_SIZE to remove
duplicate

constant.


Show warning message if soft IH ring overflows because this
should not

happen.

  
  
  It would indicate a problem with the CAM or it could happen on
  older GPUs that don't have a CAM. See below.
  
  
  
  

Signed-off-by: Philip Yang 

---

  drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c  | 8 ++--

  drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h  | 7 +--

  drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 2 +-

  drivers/gpu/drm/amd/amdgpu/ih_v6_0.c    | 4 ++--

  drivers/gpu/drm/amd/amdgpu/navi10_ih.c  | 4 ++--

  drivers/gpu/drm/amd/amdgpu/vega10_ih.c  | 4 ++--

  drivers/gpu/drm/amd/amdgpu/vega20_ih.c  | 4 ++--

  7 files changed, 20 insertions(+), 13 deletions(-)


diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c

index fceb3b384955..51a0dbd2358a 100644

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c

+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c

@@ -138,6 +138,7 @@ void amdgpu_ih_ring_fini(struct
amdgpu_device *adev, struct amdgpu_ih_ring *ih)

  /**

   * amdgpu_ih_ring_write - write IV to the ring buffer

   *

+ * @adev: amdgpu_device pointer

   * @ih: ih ring to write to

   * @iv: the iv to write

   * @num_dw: size of the iv in dw

@@ -145,8 +146,8 @@ void amdgpu_ih_ring_fini(struct
amdgpu_device *adev, struct amdgpu_ih_ring *ih)

   * Writes an IV to the ring buffer using the CPU and increment
the wptr.

   * Used for testing and delegating IVs to a software ring.

   */

-void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const
uint32_t *iv,

-  unsigned int num_dw)

+void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct
amdgpu_ih_ring *ih,

+  const uint32_t *iv, unsigned int num_dw)

  {

  uint32_t wptr = le32_to_cpu(*ih->wptr_cpu) >> 2;

  unsigned int i;

@@ -161,6 +162,9 @@ void amdgpu_ih_ring_write(struct
amdgpu_ih_ring *ih, const uint32_t *iv,

  if (wptr != READ_ONCE(ih->rptr)) {

  wmb();

  WRITE_ONCE(*ih->wptr_cpu, cpu_to_le32(wptr));

+    } else {

+    dev_warn(adev->dev, "IH soft ring buffer overflow
0x%X, 0x%X\n",

+ wptr, ih->rptr);

  
  
  If this happens, it's probably going to flood the log. It would be
  a good idea to apply a rate-limit, or use dev_warn_once. With that
  fixed, the patch is
  

Will use dev_warn_once and if adev->irq.retry_cam_enabled
  because soft IH ring overflow is fine for older GPUs without CAM.
Thanks,
Philip


  
  Reviewed-by: Felix Kuehling 
  
  
  
    }

  }

  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h

index dd1c2eded6b9..6c6184f0dbc1 100644

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h

+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h

@@ -27,6 +27,9 @@

  /* Maximum number of IVs processed at once */

  #define AMDGPU_IH_MAX_NUM_IVS    32

  +#define IH_RING_SIZE    (256 * 1024)

+#define IH_SW_RING_SIZE    (8 * 1024)    /* enough for 256 CAM
entries */

+

  struct amdgpu_device;

  struct amdgpu_iv_entry;

  @@ -97,8 +100,8 @@ struct amdgpu_ih_funcs {

  int 

Re: [PATCH v2] drm/amdgpu: Increase soft IH ring size

2023-07-07 Thread Felix Kuehling

On 2023-07-07 11:49, Philip Yang wrote:

Retry faults are delegated to soft IH ring and then processed by
deferred worker. Current soft IH ring size PAGE_SIZE can store 128
entries, which may overflow and drop retry faults, causes HW stucks
because the retry fault is not recovered.

Increase soft IH ring size to 8KB, enough to store 256 CAM entries
because we clear the CAM entry after handling the retry fault from soft
ring.

Define macro IH_RING_SIZE and IH_SW_RING_SIZE to remove duplicate
constant.

Show warning message if soft IH ring overflows because this should not
happen.


It would indicate a problem with the CAM or it could happen on older 
GPUs that don't have a CAM. See below.





Signed-off-by: Philip Yang 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c  | 8 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h  | 7 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 2 +-
  drivers/gpu/drm/amd/amdgpu/ih_v6_0.c| 4 ++--
  drivers/gpu/drm/amd/amdgpu/navi10_ih.c  | 4 ++--
  drivers/gpu/drm/amd/amdgpu/vega10_ih.c  | 4 ++--
  drivers/gpu/drm/amd/amdgpu/vega20_ih.c  | 4 ++--
  7 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index fceb3b384955..51a0dbd2358a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -138,6 +138,7 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct 
amdgpu_ih_ring *ih)
  /**
   * amdgpu_ih_ring_write - write IV to the ring buffer
   *
+ * @adev: amdgpu_device pointer
   * @ih: ih ring to write to
   * @iv: the iv to write
   * @num_dw: size of the iv in dw
@@ -145,8 +146,8 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct 
amdgpu_ih_ring *ih)
   * Writes an IV to the ring buffer using the CPU and increment the wptr.
   * Used for testing and delegating IVs to a software ring.
   */
-void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
- unsigned int num_dw)
+void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct amdgpu_ih_ring 
*ih,
+ const uint32_t *iv, unsigned int num_dw)
  {
uint32_t wptr = le32_to_cpu(*ih->wptr_cpu) >> 2;
unsigned int i;
@@ -161,6 +162,9 @@ void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const 
uint32_t *iv,
if (wptr != READ_ONCE(ih->rptr)) {
wmb();
WRITE_ONCE(*ih->wptr_cpu, cpu_to_le32(wptr));
+   } else {
+   dev_warn(adev->dev, "IH soft ring buffer overflow 0x%X, 0x%X\n",
+wptr, ih->rptr);


If this happens, it's probably going to flood the log. It would be a 
good idea to apply a rate-limit, or use dev_warn_once. With that fixed, 
the patch is


Reviewed-by: Felix Kuehling 



}
  }
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h

index dd1c2eded6b9..6c6184f0dbc1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
@@ -27,6 +27,9 @@
  /* Maximum number of IVs processed at once */
  #define AMDGPU_IH_MAX_NUM_IVS 32
  
+#define IH_RING_SIZE	(256 * 1024)

+#define IH_SW_RING_SIZE(8 * 1024)  /* enough for 256 CAM entries */
+
  struct amdgpu_device;
  struct amdgpu_iv_entry;
  
@@ -97,8 +100,8 @@ struct amdgpu_ih_funcs {

  int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
unsigned ring_size, bool use_bus_addr);
  void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring 
*ih);
-void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
- unsigned int num_dw);
+void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct amdgpu_ih_ring 
*ih,
+ const uint32_t *iv, unsigned int num_dw);
  int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev,
struct amdgpu_ih_ring *ih);
  int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 5273decc5753..fa6d0adcec20 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -493,7 +493,7 @@ void amdgpu_irq_delegate(struct amdgpu_device *adev,
 struct amdgpu_iv_entry *entry,
 unsigned int num_dw)
  {
-   amdgpu_ih_ring_write(>irq.ih_soft, entry->iv_entry, num_dw);
+   amdgpu_ih_ring_write(adev, >irq.ih_soft, entry->iv_entry, num_dw);
schedule_work(>irq.ih_soft_work);
  }
  
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c

index b02e1cef78a7..980b24120080 100644
--- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
@@ -535,7 +535,7 @@ static int ih_v6_0_sw_init(void *handle)
 * 

RE: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance

2023-07-07 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Huang, JinHuiEric 
> Sent: Friday, July 7, 2023 12:44 PM
> To: Kim, Jonathan ; amd-gfx@lists.freedesktop.org
> Cc: Joshi, Mukul 
> Subject: Re: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance
>
>
> On 2023-07-07 11:56, Kim, Jonathan wrote:
> > [Public]
> >
> >> -Original Message-
> >> From: Huang, JinHuiEric 
> >> Sent: Friday, July 7, 2023 11:46 AM
> >> To: Kim, Jonathan ; amd-
> g...@lists.freedesktop.org
> >> Subject: Re: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance
> >>
> >>
> >> On 2023-07-07 10:59, Kim, Jonathan wrote:
> >>> [Public]
> >>>
>  -Original Message-
>  From: Huang, JinHuiEric 
>  Sent: Thursday, July 6, 2023 2:19 PM
>  To: amd-gfx@lists.freedesktop.org
>  Cc: Kim, Jonathan ; Huang, JinHuiEric
>  
>  Subject: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance
> 
>  each xcc instance needs to get iq wait time and set
>  grace period accordingly.
> 
>  Signed-off-by: Eric Huang 
>  ---
> .../drm/amd/amdkfd/kfd_device_queue_manager.c |  9 --
> .../drm/amd/amdkfd/kfd_device_queue_manager.h |  2 +-
> .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 32 +++---
> 
> >> -
> .../drm/amd/amdkfd/kfd_packet_manager_v9.c|  9 +++---
> drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  2 +-
> 5 files changed, 32 insertions(+), 22 deletions(-)
> 
>  diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>  b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>  index a2bff3f01359..0f12c1989e14 100644
>  --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>  +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>  @@ -1606,6 +1606,8 @@ static int set_sched_resources(struct
>  device_queue_manager *dqm)
> 
> static int initialize_cpsch(struct device_queue_manager *dqm)
> {
>  + uint32_t xcc_id, xcc_mask = dqm->dev->xcc_mask;
>  +
>  pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
> 
>  mutex_init(>lock_hidden);
>  @@ -1620,8 +1622,11 @@ static int initialize_cpsch(struct
>  device_queue_manager *dqm)
>  init_sdma_bitmaps(dqm);
> 
>  if (dqm->dev->kfd2kgd->get_iq_wait_times)
>  - dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
>  - >wait_times, 0);
>  + for_each_inst(xcc_id, xcc_mask)
>  + dqm->dev->kfd2kgd->get_iq_wait_times(
>  + dqm->dev->adev,
>  + >wait_times[xcc_id],
>  + xcc_id);
>  return 0;
> }
> 
>  diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
>  b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
>  index 7dd4b177219d..62a6dc8d3032 100644
>  --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
>  +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
>  @@ -262,7 +262,7 @@ struct device_queue_manager {
>  /* used for GFX 9.4.3 only */
>  uint32_tcurrent_logical_xcc_start;
> 
>  - uint32_twait_times;
>  + uint32_twait_times[32];
> >>> I think wait_times[16] should be sufficient.  We only get the hamming
> >> weight of 16 bits for NUM_XCC and I believe the xcc_mask is declared as a
> >> uint16_t in the KGD portion anyway.  We may as well align to that.
>  wait_queue_head_t   destroy_wait;
> };
>  diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
>  b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
>  index 401096c103b2..f37ab4b6d88c 100644
>  --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
>  +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
>  @@ -374,27 +374,31 @@ int pm_update_grace_period(struct
>  packet_manager *pm, uint32_t grace_period)
> {
>  int retval = 0;
>  uint32_t *buffer, size;
>  + uint32_t xcc_id, xcc_mask = pm->dqm->dev->xcc_mask;
> 
>  size = pm->pmf->set_grace_period_size;
> 
>  mutex_lock(>lock);
> 
>  if (size) {
>  - kq_acquire_packet_buffer(pm->priv_queue,
>  - size / sizeof(uint32_t),
>  - (unsigned int **));
>  -
>  - if (!buffer) {
>  - pr_err("Failed to allocate buffer on kernel 
>  queue\n");
>  - retval = -ENOMEM;
>  - goto out;
>  - }
>  + for_each_inst(xcc_id, xcc_mask) {
>  + kq_acquire_packet_buffer(pm->priv_queue,
>  + 

Re: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance

2023-07-07 Thread Eric Huang



On 2023-07-07 11:56, Kim, Jonathan wrote:

[Public]


-Original Message-
From: Huang, JinHuiEric 
Sent: Friday, July 7, 2023 11:46 AM
To: Kim, Jonathan ; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance


On 2023-07-07 10:59, Kim, Jonathan wrote:

[Public]


-Original Message-
From: Huang, JinHuiEric 
Sent: Thursday, July 6, 2023 2:19 PM
To: amd-gfx@lists.freedesktop.org
Cc: Kim, Jonathan ; Huang, JinHuiEric

Subject: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance

each xcc instance needs to get iq wait time and set
grace period accordingly.

Signed-off-by: Eric Huang 
---
   .../drm/amd/amdkfd/kfd_device_queue_manager.c |  9 --
   .../drm/amd/amdkfd/kfd_device_queue_manager.h |  2 +-
   .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 32 +++---

-

   .../drm/amd/amdkfd/kfd_packet_manager_v9.c|  9 +++---
   drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  2 +-
   5 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index a2bff3f01359..0f12c1989e14 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1606,6 +1606,8 @@ static int set_sched_resources(struct
device_queue_manager *dqm)

   static int initialize_cpsch(struct device_queue_manager *dqm)
   {
+ uint32_t xcc_id, xcc_mask = dqm->dev->xcc_mask;
+
pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));

mutex_init(>lock_hidden);
@@ -1620,8 +1622,11 @@ static int initialize_cpsch(struct
device_queue_manager *dqm)
init_sdma_bitmaps(dqm);

if (dqm->dev->kfd2kgd->get_iq_wait_times)
- dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
- >wait_times, 0);
+ for_each_inst(xcc_id, xcc_mask)
+ dqm->dev->kfd2kgd->get_iq_wait_times(
+ dqm->dev->adev,
+ >wait_times[xcc_id],
+ xcc_id);
return 0;
   }

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 7dd4b177219d..62a6dc8d3032 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -262,7 +262,7 @@ struct device_queue_manager {
/* used for GFX 9.4.3 only */
uint32_tcurrent_logical_xcc_start;

- uint32_twait_times;
+ uint32_twait_times[32];

I think wait_times[16] should be sufficient.  We only get the hamming

weight of 16 bits for NUM_XCC and I believe the xcc_mask is declared as a
uint16_t in the KGD portion anyway.  We may as well align to that.

wait_queue_head_t   destroy_wait;
   };
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 401096c103b2..f37ab4b6d88c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -374,27 +374,31 @@ int pm_update_grace_period(struct
packet_manager *pm, uint32_t grace_period)
   {
int retval = 0;
uint32_t *buffer, size;
+ uint32_t xcc_id, xcc_mask = pm->dqm->dev->xcc_mask;

size = pm->pmf->set_grace_period_size;

mutex_lock(>lock);

if (size) {
- kq_acquire_packet_buffer(pm->priv_queue,
- size / sizeof(uint32_t),
- (unsigned int **));
-
- if (!buffer) {
- pr_err("Failed to allocate buffer on kernel queue\n");
- retval = -ENOMEM;
- goto out;
- }
+ for_each_inst(xcc_id, xcc_mask) {
+ kq_acquire_packet_buffer(pm->priv_queue,
+ size / sizeof(uint32_t),
+ (unsigned int **));

- retval = pm->pmf->set_grace_period(pm, buffer,
grace_period);
- if (!retval)
- kq_submit_packet(pm->priv_queue);
- else
- kq_rollback_packet(pm->priv_queue);
+ if (!buffer) {
+ pr_err("Failed to allocate buffer on kernel
queue\n");
+ retval = -ENOMEM;
+ goto out;
+ }
+
+ retval = pm->pmf->set_grace_period(pm, buffer,
+ grace_period, xcc_id);
+ if (!retval)
+ kq_submit_packet(pm->priv_queue);
+ else
+ kq_rollback_packet(pm->priv_queue);

In the event of partial success do we need to roll back 

RE: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance

2023-07-07 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Huang, JinHuiEric 
> Sent: Friday, July 7, 2023 11:46 AM
> To: Kim, Jonathan ; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance
>
>
> On 2023-07-07 10:59, Kim, Jonathan wrote:
> > [Public]
> >
> >> -Original Message-
> >> From: Huang, JinHuiEric 
> >> Sent: Thursday, July 6, 2023 2:19 PM
> >> To: amd-gfx@lists.freedesktop.org
> >> Cc: Kim, Jonathan ; Huang, JinHuiEric
> >> 
> >> Subject: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance
> >>
> >> each xcc instance needs to get iq wait time and set
> >> grace period accordingly.
> >>
> >> Signed-off-by: Eric Huang 
> >> ---
> >>   .../drm/amd/amdkfd/kfd_device_queue_manager.c |  9 --
> >>   .../drm/amd/amdkfd/kfd_device_queue_manager.h |  2 +-
> >>   .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 32 +++---
> -
> >>   .../drm/amd/amdkfd/kfd_packet_manager_v9.c|  9 +++---
> >>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  2 +-
> >>   5 files changed, 32 insertions(+), 22 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> >> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> >> index a2bff3f01359..0f12c1989e14 100644
> >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> >> @@ -1606,6 +1606,8 @@ static int set_sched_resources(struct
> >> device_queue_manager *dqm)
> >>
> >>   static int initialize_cpsch(struct device_queue_manager *dqm)
> >>   {
> >> + uint32_t xcc_id, xcc_mask = dqm->dev->xcc_mask;
> >> +
> >>pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
> >>
> >>mutex_init(>lock_hidden);
> >> @@ -1620,8 +1622,11 @@ static int initialize_cpsch(struct
> >> device_queue_manager *dqm)
> >>init_sdma_bitmaps(dqm);
> >>
> >>if (dqm->dev->kfd2kgd->get_iq_wait_times)
> >> - dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
> >> - >wait_times, 0);
> >> + for_each_inst(xcc_id, xcc_mask)
> >> + dqm->dev->kfd2kgd->get_iq_wait_times(
> >> + dqm->dev->adev,
> >> + >wait_times[xcc_id],
> >> + xcc_id);
> >>return 0;
> >>   }
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> >> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> >> index 7dd4b177219d..62a6dc8d3032 100644
> >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> >> @@ -262,7 +262,7 @@ struct device_queue_manager {
> >>/* used for GFX 9.4.3 only */
> >>uint32_tcurrent_logical_xcc_start;
> >>
> >> - uint32_twait_times;
> >> + uint32_twait_times[32];
> > I think wait_times[16] should be sufficient.  We only get the hamming
> weight of 16 bits for NUM_XCC and I believe the xcc_mask is declared as a
> uint16_t in the KGD portion anyway.  We may as well align to that.
> >
> >>wait_queue_head_t   destroy_wait;
> >>   };
> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> >> b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> >> index 401096c103b2..f37ab4b6d88c 100644
> >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> >> @@ -374,27 +374,31 @@ int pm_update_grace_period(struct
> >> packet_manager *pm, uint32_t grace_period)
> >>   {
> >>int retval = 0;
> >>uint32_t *buffer, size;
> >> + uint32_t xcc_id, xcc_mask = pm->dqm->dev->xcc_mask;
> >>
> >>size = pm->pmf->set_grace_period_size;
> >>
> >>mutex_lock(>lock);
> >>
> >>if (size) {
> >> - kq_acquire_packet_buffer(pm->priv_queue,
> >> - size / sizeof(uint32_t),
> >> - (unsigned int **));
> >> -
> >> - if (!buffer) {
> >> - pr_err("Failed to allocate buffer on kernel 
> >> queue\n");
> >> - retval = -ENOMEM;
> >> - goto out;
> >> - }
> >> + for_each_inst(xcc_id, xcc_mask) {
> >> + kq_acquire_packet_buffer(pm->priv_queue,
> >> + size / sizeof(uint32_t),
> >> + (unsigned int **));
> >>
> >> - retval = pm->pmf->set_grace_period(pm, buffer,
> >> grace_period);
> >> - if (!retval)
> >> - kq_submit_packet(pm->priv_queue);
> >> - else
> >> - kq_rollback_packet(pm->priv_queue);
> >> + if (!buffer) {
> >> + pr_err("Failed to allocate buffer on kernel
> >> queue\n");
> >> + 

[PATCH v2] drm/amdgpu: Increase soft IH ring size

2023-07-07 Thread Philip Yang
Retry faults are delegated to soft IH ring and then processed by
deferred worker. Current soft IH ring size PAGE_SIZE can store 128
entries, which may overflow and drop retry faults, causes HW stucks
because the retry fault is not recovered.

Increase soft IH ring size to 8KB, enough to store 256 CAM entries
because we clear the CAM entry after handling the retry fault from soft
ring.

Define macro IH_RING_SIZE and IH_SW_RING_SIZE to remove duplicate
constant.

Show warning message if soft IH ring overflows because this should not
happen.

Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c  | 8 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h  | 7 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/ih_v6_0.c| 4 ++--
 drivers/gpu/drm/amd/amdgpu/navi10_ih.c  | 4 ++--
 drivers/gpu/drm/amd/amdgpu/vega10_ih.c  | 4 ++--
 drivers/gpu/drm/amd/amdgpu/vega20_ih.c  | 4 ++--
 7 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index fceb3b384955..51a0dbd2358a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -138,6 +138,7 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct 
amdgpu_ih_ring *ih)
 /**
  * amdgpu_ih_ring_write - write IV to the ring buffer
  *
+ * @adev: amdgpu_device pointer
  * @ih: ih ring to write to
  * @iv: the iv to write
  * @num_dw: size of the iv in dw
@@ -145,8 +146,8 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct 
amdgpu_ih_ring *ih)
  * Writes an IV to the ring buffer using the CPU and increment the wptr.
  * Used for testing and delegating IVs to a software ring.
  */
-void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
- unsigned int num_dw)
+void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct amdgpu_ih_ring 
*ih,
+ const uint32_t *iv, unsigned int num_dw)
 {
uint32_t wptr = le32_to_cpu(*ih->wptr_cpu) >> 2;
unsigned int i;
@@ -161,6 +162,9 @@ void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const 
uint32_t *iv,
if (wptr != READ_ONCE(ih->rptr)) {
wmb();
WRITE_ONCE(*ih->wptr_cpu, cpu_to_le32(wptr));
+   } else {
+   dev_warn(adev->dev, "IH soft ring buffer overflow 0x%X, 0x%X\n",
+wptr, ih->rptr);
}
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
index dd1c2eded6b9..6c6184f0dbc1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
@@ -27,6 +27,9 @@
 /* Maximum number of IVs processed at once */
 #define AMDGPU_IH_MAX_NUM_IVS  32
 
+#define IH_RING_SIZE   (256 * 1024)
+#define IH_SW_RING_SIZE(8 * 1024)  /* enough for 256 CAM entries */
+
 struct amdgpu_device;
 struct amdgpu_iv_entry;
 
@@ -97,8 +100,8 @@ struct amdgpu_ih_funcs {
 int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
unsigned ring_size, bool use_bus_addr);
 void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring 
*ih);
-void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
- unsigned int num_dw);
+void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct amdgpu_ih_ring 
*ih,
+ const uint32_t *iv, unsigned int num_dw);
 int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev,
struct amdgpu_ih_ring *ih);
 int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 5273decc5753..fa6d0adcec20 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -493,7 +493,7 @@ void amdgpu_irq_delegate(struct amdgpu_device *adev,
 struct amdgpu_iv_entry *entry,
 unsigned int num_dw)
 {
-   amdgpu_ih_ring_write(>irq.ih_soft, entry->iv_entry, num_dw);
+   amdgpu_ih_ring_write(adev, >irq.ih_soft, entry->iv_entry, num_dw);
schedule_work(>irq.ih_soft_work);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c 
b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
index b02e1cef78a7..980b24120080 100644
--- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
@@ -535,7 +535,7 @@ static int ih_v6_0_sw_init(void *handle)
 * use bus address for ih ring by psp bl */
use_bus_addr =
(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ? false : true;
-   r = amdgpu_ih_ring_init(adev, >irq.ih, 256 * 1024, use_bus_addr);
+   r = amdgpu_ih_ring_init(adev, >irq.ih, IH_RING_SIZE, 
use_bus_addr);
if (r)
return r;
 
@@ -548,7 +548,7 @@ static int 

Re: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance

2023-07-07 Thread Eric Huang



On 2023-07-07 10:59, Kim, Jonathan wrote:

[Public]


-Original Message-
From: Huang, JinHuiEric 
Sent: Thursday, July 6, 2023 2:19 PM
To: amd-gfx@lists.freedesktop.org
Cc: Kim, Jonathan ; Huang, JinHuiEric

Subject: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance

each xcc instance needs to get iq wait time and set
grace period accordingly.

Signed-off-by: Eric Huang 
---
  .../drm/amd/amdkfd/kfd_device_queue_manager.c |  9 --
  .../drm/amd/amdkfd/kfd_device_queue_manager.h |  2 +-
  .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 32 +++
  .../drm/amd/amdkfd/kfd_packet_manager_v9.c|  9 +++---
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  2 +-
  5 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index a2bff3f01359..0f12c1989e14 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1606,6 +1606,8 @@ static int set_sched_resources(struct
device_queue_manager *dqm)

  static int initialize_cpsch(struct device_queue_manager *dqm)
  {
+ uint32_t xcc_id, xcc_mask = dqm->dev->xcc_mask;
+
   pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));

   mutex_init(>lock_hidden);
@@ -1620,8 +1622,11 @@ static int initialize_cpsch(struct
device_queue_manager *dqm)
   init_sdma_bitmaps(dqm);

   if (dqm->dev->kfd2kgd->get_iq_wait_times)
- dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
- >wait_times, 0);
+ for_each_inst(xcc_id, xcc_mask)
+ dqm->dev->kfd2kgd->get_iq_wait_times(
+ dqm->dev->adev,
+ >wait_times[xcc_id],
+ xcc_id);
   return 0;
  }

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 7dd4b177219d..62a6dc8d3032 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -262,7 +262,7 @@ struct device_queue_manager {
   /* used for GFX 9.4.3 only */
   uint32_tcurrent_logical_xcc_start;

- uint32_twait_times;
+ uint32_twait_times[32];

I think wait_times[16] should be sufficient.  We only get the hamming weight of 
16 bits for NUM_XCC and I believe the xcc_mask is declared as a uint16_t in the 
KGD portion anyway.  We may as well align to that.


   wait_queue_head_t   destroy_wait;
  };
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 401096c103b2..f37ab4b6d88c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -374,27 +374,31 @@ int pm_update_grace_period(struct
packet_manager *pm, uint32_t grace_period)
  {
   int retval = 0;
   uint32_t *buffer, size;
+ uint32_t xcc_id, xcc_mask = pm->dqm->dev->xcc_mask;

   size = pm->pmf->set_grace_period_size;

   mutex_lock(>lock);

   if (size) {
- kq_acquire_packet_buffer(pm->priv_queue,
- size / sizeof(uint32_t),
- (unsigned int **));
-
- if (!buffer) {
- pr_err("Failed to allocate buffer on kernel queue\n");
- retval = -ENOMEM;
- goto out;
- }
+ for_each_inst(xcc_id, xcc_mask) {
+ kq_acquire_packet_buffer(pm->priv_queue,
+ size / sizeof(uint32_t),
+ (unsigned int **));

- retval = pm->pmf->set_grace_period(pm, buffer,
grace_period);
- if (!retval)
- kq_submit_packet(pm->priv_queue);
- else
- kq_rollback_packet(pm->priv_queue);
+ if (!buffer) {
+ pr_err("Failed to allocate buffer on kernel
queue\n");
+ retval = -ENOMEM;
+ goto out;
+ }
+
+ retval = pm->pmf->set_grace_period(pm, buffer,
+ grace_period, xcc_id);
+ if (!retval)
+ kq_submit_packet(pm->priv_queue);
+ else
+ kq_rollback_packet(pm->priv_queue);

In the event of partial success do we need to roll back (i.e. resubmit default 
grace period) on failure?
The function pm_set_grace_period_v9 always return 0, and it is not 
complicate operation, it should be always successful. Partial success 
will not be the case we should care about at this moment.


Regards,
Eric

I believe the default grace 

[PATCH v5] drm/amdgpu:update kernel vcn ring test

2023-07-07 Thread Saleemkhan Jamadar
add session context buffer to decoder ring test.

v5 - clear the session ct buffer (Christian)
v4 - data type, explain change of ib size change (Christian)
v3 - indent and  v2 changes correction. (Christian)
v2 - put the buffer at the end of the IB (Christian)

Signed-off-by: Saleemkhan Jamadar 
Acked-by: Leo Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 13 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h |  5 -
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 2d94f1b63bd6..76e9a2418286 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -573,13 +573,15 @@ static int amdgpu_vcn_dec_get_create_msg(struct 
amdgpu_ring *ring, uint32_t hand
int r, i;
 
memset(ib, 0, sizeof(*ib));
-   r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2,
+   /* 34 pages : 128KiB  session context buffer size and 8KiB ib msg */
+   r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 34,
AMDGPU_IB_POOL_DIRECT,
ib);
if (r)
return r;
 
msg = (uint32_t *)AMDGPU_GPU_PAGE_ALIGN((unsigned long)ib->ptr);
+   memset(msg, 0, (AMDGPU_GPU_PAGE_SIZE * 34));
msg[0] = cpu_to_le32(0x0028);
msg[1] = cpu_to_le32(0x0038);
msg[2] = cpu_to_le32(0x0001);
@@ -608,13 +610,15 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct 
amdgpu_ring *ring, uint32_t han
int r, i;
 
memset(ib, 0, sizeof(*ib));
-   r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2,
+   /* 34 pages : 128KiB  session context buffer size and 8KiB ib msg */
+   r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 34,
AMDGPU_IB_POOL_DIRECT,
ib);
if (r)
return r;
 
msg = (uint32_t *)AMDGPU_GPU_PAGE_ALIGN((unsigned long)ib->ptr);
+   memset(msg, 0, (AMDGPU_GPU_PAGE_SIZE * 34));
msg[0] = cpu_to_le32(0x0028);
msg[1] = cpu_to_le32(0x0018);
msg[2] = cpu_to_le32(0x);
@@ -700,6 +704,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring 
*ring,
struct amdgpu_job *job;
struct amdgpu_ib *ib;
uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
+   uint64_t session_ctx_buf_gaddr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr 
+ 8192);
bool sq = amdgpu_vcn_using_unified_queue(ring);
uint32_t *ib_checksum;
uint32_t ib_pack_in_dw;
@@ -730,6 +735,10 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring 
*ring,
ib->length_dw += sizeof(struct amdgpu_vcn_decode_buffer) / 4;
memset(decode_buffer, 0, sizeof(struct amdgpu_vcn_decode_buffer));
 
+   decode_buffer->valid_buf_flag |=
+   
cpu_to_le32(AMDGPU_VCN_CMD_FLAG_SESSION_CONTEXT_BUFFER);
+   decode_buffer->session_context_buffer_address_hi = 
upper_32_bits(session_ctx_buf_gaddr);
+   decode_buffer->session_context_buffer_address_lo = 
lower_32_bits(session_ctx_buf_gaddr);
decode_buffer->valid_buf_flag |= 
cpu_to_le32(AMDGPU_VCN_CMD_FLAG_MSG_BUFFER);
decode_buffer->msg_buffer_address_hi = cpu_to_le32(addr >> 32);
decode_buffer->msg_buffer_address_lo = cpu_to_le32(addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index f1397ef66fd7..2df43cd76c10 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -166,6 +166,7 @@
 
 #define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER   0x0001
 #define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER 0x0001
+#define AMDGPU_VCN_CMD_FLAG_SESSION_CONTEXT_BUFFER 0x0010
 
 #define VCN_CODEC_DISABLE_MASK_AV1  (1 << 0)
 #define VCN_CODEC_DISABLE_MASK_VP9  (1 << 1)
@@ -357,7 +358,9 @@ struct amdgpu_vcn_decode_buffer {
uint32_t valid_buf_flag;
uint32_t msg_buffer_address_hi;
uint32_t msg_buffer_address_lo;
-   uint32_t pad[30];
+   uint32_t session_context_buffer_address_hi;
+   uint32_t session_context_buffer_address_lo;
+   uint32_t pad[28];
 };
 
 #define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80
-- 
2.25.1



Re: [PATCH] drm/amdgpu: Increase IH soft ring size

2023-07-07 Thread Felix Kuehling



Am 2023-07-07 um 10:14 schrieb Philip Yang:

Retry faults are delegated to IH soft ring and then processed by
deferred worker. Current IH soft ring size PAGE_SIZE can store 128
entries, which may overflow and drop retry faults, causes HW stucks
because the retry fault is not recovered.

Increase IH soft ring size to the same size as IH ring, define macro
IH_RING_SIZE to remove duplicate constant.


As discussed offline, dropping retry fault interrupts is only a problem 
when the CAM is enabled. You only need as many entries in the soft IH 
ring as there are entries in the CAM.


Regards,
  Felix




Show warning message if IH soft ring overflows because this should not
happen any more.

Signed-off-by: Philip Yang 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c  | 8 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h  | 4 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 2 +-
  drivers/gpu/drm/amd/amdgpu/ih_v6_0.c| 5 +++--
  drivers/gpu/drm/amd/amdgpu/navi10_ih.c  | 5 +++--
  drivers/gpu/drm/amd/amdgpu/vega10_ih.c  | 5 +++--
  drivers/gpu/drm/amd/amdgpu/vega20_ih.c  | 5 +++--
  7 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index fceb3b384955..51a0dbd2358a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -138,6 +138,7 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct 
amdgpu_ih_ring *ih)
  /**
   * amdgpu_ih_ring_write - write IV to the ring buffer
   *
+ * @adev: amdgpu_device pointer
   * @ih: ih ring to write to
   * @iv: the iv to write
   * @num_dw: size of the iv in dw
@@ -145,8 +146,8 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct 
amdgpu_ih_ring *ih)
   * Writes an IV to the ring buffer using the CPU and increment the wptr.
   * Used for testing and delegating IVs to a software ring.
   */
-void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
- unsigned int num_dw)
+void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct amdgpu_ih_ring 
*ih,
+ const uint32_t *iv, unsigned int num_dw)
  {
uint32_t wptr = le32_to_cpu(*ih->wptr_cpu) >> 2;
unsigned int i;
@@ -161,6 +162,9 @@ void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const 
uint32_t *iv,
if (wptr != READ_ONCE(ih->rptr)) {
wmb();
WRITE_ONCE(*ih->wptr_cpu, cpu_to_le32(wptr));
+   } else {
+   dev_warn(adev->dev, "IH soft ring buffer overflow 0x%X, 0x%X\n",
+wptr, ih->rptr);
}
  }
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h

index dd1c2eded6b9..a8cf67f1f011 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
@@ -97,8 +97,8 @@ struct amdgpu_ih_funcs {
  int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
unsigned ring_size, bool use_bus_addr);
  void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring 
*ih);
-void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
- unsigned int num_dw);
+void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct amdgpu_ih_ring 
*ih,
+ const uint32_t *iv, unsigned int num_dw);
  int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev,
struct amdgpu_ih_ring *ih);
  int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 5273decc5753..fa6d0adcec20 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -493,7 +493,7 @@ void amdgpu_irq_delegate(struct amdgpu_device *adev,
 struct amdgpu_iv_entry *entry,
 unsigned int num_dw)
  {
-   amdgpu_ih_ring_write(>irq.ih_soft, entry->iv_entry, num_dw);
+   amdgpu_ih_ring_write(adev, >irq.ih_soft, entry->iv_entry, num_dw);
schedule_work(>irq.ih_soft_work);
  }
  
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c

index b02e1cef78a7..21d2e57cffe7 100644
--- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
@@ -32,6 +32,7 @@
  #include "soc15_common.h"
  #include "ih_v6_0.h"
  
+#define IH_RING_SIZE	(256 * 1024)

  #define MAX_REARM_RETRY 10
  
  static void ih_v6_0_set_interrupt_funcs(struct amdgpu_device *adev);

@@ -535,7 +536,7 @@ static int ih_v6_0_sw_init(void *handle)
 * use bus address for ih ring by psp bl */
use_bus_addr =
(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ? false : true;
-   r = amdgpu_ih_ring_init(adev, >irq.ih, 256 * 1024, use_bus_addr);
+   r = amdgpu_ih_ring_init(adev, 

RE: [PATCH 6/6] drm/amdkfd: add multi-process debugging support for GC v9.4.3

2023-07-07 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Huang, JinHuiEric 
> Sent: Thursday, July 6, 2023 2:19 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kim, Jonathan ; Kim, Jonathan
> ; Kuehling, Felix ;
> Huang, JinHuiEric 
> Subject: [PATCH 6/6] drm/amdkfd: add multi-process debugging support for
> GC v9.4.3
>
> From: Jonathan Kim 
>
> Similar to GC v9.4.2, GC v9.4.3 should use the 5-Dword extended
> MAP_PROCESS packet to support multi-process debugging.  Update the
> mutli-process debug support list so that the KFD updates the runlist
> on debug mode setting and that it allocates enough GTT memory during
> KFD device initialization.
>
> Signed-off-by: Jonathan Kim 
> Reviewed-by: Felix Kuehling 
> Signed-off-by: Eric Huang 

This patch is Reviewed-by: Jonathan Kim 

> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_debug.h | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> index a289e59ceb79..a0afc6a7b6c4 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> @@ -76,8 +76,9 @@ int kfd_dbg_send_exception_to_runtime(struct
> kfd_process *p,
>
>  static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_node *dev)
>  {
> - return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
> -KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0);
> + return (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
> + KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
> + KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0));
>  }
>
>  void debug_event_write_work_handler(struct work_struct *work);
> --
> 2.34.1



RE: [PATCH 3/6] drm/amdkfd: enable watch points globally for gfx943

2023-07-07 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Huang, JinHuiEric 
> Sent: Thursday, July 6, 2023 2:19 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kim, Jonathan ; Kim, Jonathan
> ; Kuehling, Felix ;
> Huang, JinHuiEric 
> Subject: [PATCH 3/6] drm/amdkfd: enable watch points globally for gfx943
>
> From: Jonathan Kim 
>
> Set watch points for all xcc instances on GFX943.
>
> Signed-off-by: Jonathan Kim 
> Reviewed-by: Felix Kuehling 
> Signed-off-by: Eric Huang 

This patch is Reviewed-by: Jonathan Kim 

> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 8 +---
>  1 file changed, 5 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> index 24083db44724..190b03efe5ff 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> @@ -446,7 +446,8 @@ int kfd_dbg_trap_set_dev_address_watch(struct
> kfd_process_device *pdd,
>   uint32_t *watch_id,
>   uint32_t watch_mode)
>  {
> - int r = kfd_dbg_get_dev_watch_id(pdd, watch_id);
> + int xcc_id, r = kfd_dbg_get_dev_watch_id(pdd, watch_id);
> + uint32_t xcc_mask = pdd->dev->xcc_mask;
>
>   if (r)
>   return r;
> @@ -460,14 +461,15 @@ int kfd_dbg_trap_set_dev_address_watch(struct
> kfd_process_device *pdd,
>   }
>
>   amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
> - pdd->watch_points[*watch_id] = pdd->dev->kfd2kgd-
> >set_address_watch(
> + for_each_inst(xcc_id, xcc_mask)
> + pdd->watch_points[*watch_id] = pdd->dev->kfd2kgd-
> >set_address_watch(
>   pdd->dev->adev,
>   watch_address,
>   watch_address_mask,
>   *watch_id,
>   watch_mode,
>   pdd->dev->vm_info.last_vmid_kfd,
> - 0);
> + xcc_id);
>   amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
>
>   if (!pdd->dev->kfd->shared_resources.enable_mes)
> --
> 2.34.1



Re: [PATCH v4] drm/amdgpu:update kernel vcn ring test

2023-07-07 Thread Saleemkhan Jamadar

Hi Christian,

response inline   [Saleem].

Regards,

Saleem

On 07/07/23 12:35, Christian König wrote:



Am 06.07.23 um 16:47 schrieb Saleemkhan Jamadar:

add session context buffer to decoder ring test.

v4 - data type, explain change of ib size change (Christian)
v3 - indent and  v2 changes correction. (Christian)
v2 - put the buffer at the end of the IB (Christian)

Signed-off-by: Saleemkhan Jamadar 
Acked-by: Leo Liu 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 11 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h |  5 -
  2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c

index 2d94f1b63bd6..9bdfe665f603 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -573,7 +573,8 @@ static int amdgpu_vcn_dec_get_create_msg(struct 
amdgpu_ring *ring, uint32_t hand

  int r, i;
    memset(ib, 0, sizeof(*ib));
-    r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2,
+    /* 34 pages : 128KiB  session context buffer size and 8KiB ib 
msg */

+    r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 34,
  AMDGPU_IB_POOL_DIRECT,
  ib);
  if (r)
@@ -608,7 +609,8 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct 
amdgpu_ring *ring, uint32_t han

  int r, i;
    memset(ib, 0, sizeof(*ib));
-    r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2,
+    /* 34 pages : 128KB  session context buffer size and 8KB ib msg */
+    r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 34,
  AMDGPU_IB_POOL_DIRECT,
  ib);


One more question here: Does the create and destroy message need to 
point to the same session context buffer or is it ok that we use a 
separate dummy for both?
[Saleem] Both case works Ok. Version 1 change  had same used for both 
cmds.



Either way we should probably clear the context buffer with zeros.
[Saleem] Noted, will make change.
Apart from that this now looks good to me,
Christian.


  if (r)
@@ -700,6 +702,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct 
amdgpu_ring *ring,

  struct amdgpu_job *job;
  struct amdgpu_ib *ib;
  uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
+    uint64_t session_ctx_buf_gaddr = 
AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr + 8192);

  bool sq = amdgpu_vcn_using_unified_queue(ring);
  uint32_t *ib_checksum;
  uint32_t ib_pack_in_dw;
@@ -730,6 +733,10 @@ static int amdgpu_vcn_dec_sw_send_msg(struct 
amdgpu_ring *ring,

  ib->length_dw += sizeof(struct amdgpu_vcn_decode_buffer) / 4;
  memset(decode_buffer, 0, sizeof(struct amdgpu_vcn_decode_buffer));
  +    decode_buffer->valid_buf_flag |=
+ cpu_to_le32(AMDGPU_VCN_CMD_FLAG_SESSION_CONTEXT_BUFFER);
+    decode_buffer->session_context_buffer_address_hi = 
upper_32_bits(session_ctx_buf_gaddr);
+    decode_buffer->session_context_buffer_address_lo = 
lower_32_bits(session_ctx_buf_gaddr);
  decode_buffer->valid_buf_flag |= 
cpu_to_le32(AMDGPU_VCN_CMD_FLAG_MSG_BUFFER);

  decode_buffer->msg_buffer_address_hi = cpu_to_le32(addr >> 32);
  decode_buffer->msg_buffer_address_lo = cpu_to_le32(addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h

index f1397ef66fd7..2df43cd76c10 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -166,6 +166,7 @@
    #define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER    0x0001
  #define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER    0x0001
+#define AMDGPU_VCN_CMD_FLAG_SESSION_CONTEXT_BUFFER 0x0010
    #define VCN_CODEC_DISABLE_MASK_AV1  (1 << 0)
  #define VCN_CODEC_DISABLE_MASK_VP9  (1 << 1)
@@ -357,7 +358,9 @@ struct amdgpu_vcn_decode_buffer {
  uint32_t valid_buf_flag;
  uint32_t msg_buffer_address_hi;
  uint32_t msg_buffer_address_lo;
-    uint32_t pad[30];
+    uint32_t session_context_buffer_address_hi;
+    uint32_t session_context_buffer_address_lo;
+    uint32_t pad[28];
  };
    #define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80




RE: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance

2023-07-07 Thread Kim, Jonathan
[Public]

> -Original Message-
> From: Huang, JinHuiEric 
> Sent: Thursday, July 6, 2023 2:19 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kim, Jonathan ; Huang, JinHuiEric
> 
> Subject: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance
>
> each xcc instance needs to get iq wait time and set
> grace period accordingly.
>
> Signed-off-by: Eric Huang 
> ---
>  .../drm/amd/amdkfd/kfd_device_queue_manager.c |  9 --
>  .../drm/amd/amdkfd/kfd_device_queue_manager.h |  2 +-
>  .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 32 +++
>  .../drm/amd/amdkfd/kfd_packet_manager_v9.c|  9 +++---
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  2 +-
>  5 files changed, 32 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index a2bff3f01359..0f12c1989e14 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -1606,6 +1606,8 @@ static int set_sched_resources(struct
> device_queue_manager *dqm)
>
>  static int initialize_cpsch(struct device_queue_manager *dqm)
>  {
> + uint32_t xcc_id, xcc_mask = dqm->dev->xcc_mask;
> +
>   pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
>
>   mutex_init(>lock_hidden);
> @@ -1620,8 +1622,11 @@ static int initialize_cpsch(struct
> device_queue_manager *dqm)
>   init_sdma_bitmaps(dqm);
>
>   if (dqm->dev->kfd2kgd->get_iq_wait_times)
> - dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
> - >wait_times, 0);
> + for_each_inst(xcc_id, xcc_mask)
> + dqm->dev->kfd2kgd->get_iq_wait_times(
> + dqm->dev->adev,
> + >wait_times[xcc_id],
> + xcc_id);
>   return 0;
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> index 7dd4b177219d..62a6dc8d3032 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> @@ -262,7 +262,7 @@ struct device_queue_manager {
>   /* used for GFX 9.4.3 only */
>   uint32_tcurrent_logical_xcc_start;
>
> - uint32_twait_times;
> + uint32_twait_times[32];

I think wait_times[16] should be sufficient.  We only get the hamming weight of 
16 bits for NUM_XCC and I believe the xcc_mask is declared as a uint16_t in the 
KGD portion anyway.  We may as well align to that.

>
>   wait_queue_head_t   destroy_wait;
>  };
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> index 401096c103b2..f37ab4b6d88c 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> @@ -374,27 +374,31 @@ int pm_update_grace_period(struct
> packet_manager *pm, uint32_t grace_period)
>  {
>   int retval = 0;
>   uint32_t *buffer, size;
> + uint32_t xcc_id, xcc_mask = pm->dqm->dev->xcc_mask;
>
>   size = pm->pmf->set_grace_period_size;
>
>   mutex_lock(>lock);
>
>   if (size) {
> - kq_acquire_packet_buffer(pm->priv_queue,
> - size / sizeof(uint32_t),
> - (unsigned int **));
> -
> - if (!buffer) {
> - pr_err("Failed to allocate buffer on kernel queue\n");
> - retval = -ENOMEM;
> - goto out;
> - }
> + for_each_inst(xcc_id, xcc_mask) {
> + kq_acquire_packet_buffer(pm->priv_queue,
> + size / sizeof(uint32_t),
> + (unsigned int **));
>
> - retval = pm->pmf->set_grace_period(pm, buffer,
> grace_period);
> - if (!retval)
> - kq_submit_packet(pm->priv_queue);
> - else
> - kq_rollback_packet(pm->priv_queue);
> + if (!buffer) {
> + pr_err("Failed to allocate buffer on kernel
> queue\n");
> + retval = -ENOMEM;
> + goto out;
> + }
> +
> + retval = pm->pmf->set_grace_period(pm, buffer,
> + grace_period, xcc_id);
> + if (!retval)
> + kq_submit_packet(pm->priv_queue);
> + else
> + kq_rollback_packet(pm->priv_queue);

In the event of partial success do we need to roll back (i.e. resubmit default 
grace period) on failure?
I believe the default grace period is put in place for better CWSR performance 
in normal mode, so 

RE: [PATCH] drm/amdgpu: Increase IH soft ring size

2023-07-07 Thread Joshi, Mukul
[AMD Official Use Only - General]

> -Original Message-
> From: Yang, Philip 
> Sent: Friday, July 7, 2023 10:15 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kuehling, Felix ; Joshi, Mukul
> ; Yang, Philip 
> Subject: [PATCH] drm/amdgpu: Increase IH soft ring size
>
> Retry faults are delegated to IH soft ring and then processed by deferred
> worker. Current IH soft ring size PAGE_SIZE can store 128 entries, which may
> overflow and drop retry faults, causes HW stucks because the retry fault is 
> not
> recovered.
>
> Increase IH soft ring size to the same size as IH ring, define macro
> IH_RING_SIZE to remove duplicate constant.
>
> Show warning message if IH soft ring overflows because this should not
> happen any more.
>
> Signed-off-by: Philip Yang 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c  | 8 ++--
> drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h  | 4 ++--
> drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 2 +-
>  drivers/gpu/drm/amd/amdgpu/ih_v6_0.c| 5 +++--
>  drivers/gpu/drm/amd/amdgpu/navi10_ih.c  | 5 +++--
> drivers/gpu/drm/amd/amdgpu/vega10_ih.c  | 5 +++--
> drivers/gpu/drm/amd/amdgpu/vega20_ih.c  | 5 +++--
>  7 files changed, 21 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> index fceb3b384955..51a0dbd2358a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> @@ -138,6 +138,7 @@ void amdgpu_ih_ring_fini(struct amdgpu_device
> *adev, struct amdgpu_ih_ring *ih)
>  /**
>   * amdgpu_ih_ring_write - write IV to the ring buffer
>   *
> + * @adev: amdgpu_device pointer
>   * @ih: ih ring to write to
>   * @iv: the iv to write
>   * @num_dw: size of the iv in dw
> @@ -145,8 +146,8 @@ void amdgpu_ih_ring_fini(struct amdgpu_device
> *adev, struct amdgpu_ih_ring *ih)
>   * Writes an IV to the ring buffer using the CPU and increment the wptr.
>   * Used for testing and delegating IVs to a software ring.
>   */
> -void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
> -   unsigned int num_dw)
> +void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct
> amdgpu_ih_ring *ih,
> +   const uint32_t *iv, unsigned int num_dw)
>  {
>   uint32_t wptr = le32_to_cpu(*ih->wptr_cpu) >> 2;
>   unsigned int i;
> @@ -161,6 +162,9 @@ void amdgpu_ih_ring_write(struct amdgpu_ih_ring
> *ih, const uint32_t *iv,
>   if (wptr != READ_ONCE(ih->rptr)) {
>   wmb();
>   WRITE_ONCE(*ih->wptr_cpu, cpu_to_le32(wptr));
> + } else {
> + dev_warn(adev->dev, "IH soft ring buffer overflow 0x%X,
> 0x%X\n",
> +  wptr, ih->rptr);
>   }
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> index dd1c2eded6b9..a8cf67f1f011 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> @@ -97,8 +97,8 @@ struct amdgpu_ih_funcs {  int
> amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
>   unsigned ring_size, bool use_bus_addr);  void
> amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring
> *ih); -void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t
> *iv,
> -   unsigned int num_dw);
> +void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct
> amdgpu_ih_ring *ih,
> +   const uint32_t *iv, unsigned int num_dw);
>  int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev,
>   struct amdgpu_ih_ring *ih);
>  int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring
> *ih); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
> index 5273decc5753..fa6d0adcec20 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
> @@ -493,7 +493,7 @@ void amdgpu_irq_delegate(struct amdgpu_device
> *adev,
>struct amdgpu_iv_entry *entry,
>unsigned int num_dw)
>  {
> - amdgpu_ih_ring_write(>irq.ih_soft, entry->iv_entry, num_dw);
> + amdgpu_ih_ring_write(adev, >irq.ih_soft, entry->iv_entry,
> +num_dw);
>   schedule_work(>irq.ih_soft_work);
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
> b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
> index b02e1cef78a7..21d2e57cffe7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
> @@ -32,6 +32,7 @@
>  #include "soc15_common.h"
>  #include "ih_v6_0.h"
>
> +#define IH_RING_SIZE (256 * 1024)

I would recommend moving IH_RING_SIZE to amdgpu_ih.h instead of duplicating in 
the .c files.
The rest looks good to me.

Regards,
Mukul

>  #define MAX_REARM_RETRY 10
>
>  static void ih_v6_0_set_interrupt_funcs(struct amdgpu_device *adev); @@ -
> 535,7 +536,7 @@ static int ih_v6_0_sw_init(void 

RE: [PATCH 5/6] drm/amdkfd: always keep trap enabled for GC v9.4.3

2023-07-07 Thread Kim, Jonathan
[Public]

If we implement this in the GC 9.4.3 KGD disable call in patch 1 (see comments 
for that one), then it will look less awkward and we can drop this.

Thanks,

Jon

> -Original Message-
> From: Huang, JinHuiEric 
> Sent: Thursday, July 6, 2023 2:19 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kim, Jonathan ; Huang, JinHuiEric
> 
> Subject: [PATCH 5/6] drm/amdkfd: always keep trap enabled for GC v9.4.3
>
> To set TTMP setup on by default.
>
> Signed-off-by: Eric Huang 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_debug.c   | 3 ++-
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c | 6 +++---
>  3 files changed, 6 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index cf1db0ab3471..47c5d16677d6 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -2842,7 +2842,7 @@ static int runtime_disable(struct kfd_process *p)
>   pdd->spi_dbg_override =
>   pdd->dev->kfd2kgd-
> >disable_debug_trap(
>   pdd->dev->adev,
> - false,
> + KFD_GC_VERSION(pdd->dev) ==
> IP_VERSION(9, 4, 3),
>   pdd->dev->vm_info.last_vmid_kfd);
>
>   if (!pdd->dev->kfd->shared_resources.enable_mes)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> index 190b03efe5ff..4cb9b3b18065 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> @@ -591,7 +591,8 @@ void kfd_dbg_trap_deactivate(struct kfd_process
> *target, bool unwind, int unwind
>   pdd->spi_dbg_override =
>   pdd->dev->kfd2kgd->disable_debug_trap(
>   pdd->dev->adev,
> - target->runtime_info.ttmp_setup,
> + KFD_GC_VERSION(pdd->dev) ==
> IP_VERSION(9, 4, 3) ?
> + true : target-
> >runtime_info.ttmp_setup,
>   pdd->dev->vm_info.last_vmid_kfd);
>   amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index ba04a4baecf2..91ae9121e2bf 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -1644,9 +1644,9 @@ struct kfd_process_device
> *kfd_create_process_device_data(struct kfd_node *dev,
>   p->pdds[p->n_pdds++] = pdd;
>   if (kfd_dbg_is_per_vmid_supported(pdd->dev))
>   pdd->spi_dbg_override = pdd->dev->kfd2kgd-
> >disable_debug_trap(
> - pdd->dev->adev,
> - false,
> - 0);
> + pdd->dev->adev,
> + KFD_GC_VERSION(dev) == IP_VERSION(9, 4,
> 3),
> + 0);
>
>   /* Init idr used for memory handle translation */
>   idr_init(>alloc_idr);
> --
> 2.34.1



RE: [PATCH 1/6] drm/amdkfd: add kfd2kgd debugger callbacks for GC v9.4.3

2023-07-07 Thread Kim, Jonathan
[AMD Official Use Only - General]

> -Original Message-
> From: Huang, JinHuiEric 
> Sent: Thursday, July 6, 2023 2:19 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kim, Jonathan ; Kim, Jonathan
> ; Huang, JinHuiEric 
> Subject: [PATCH 1/6] drm/amdkfd: add kfd2kgd debugger callbacks for GC
> v9.4.3
>
> From: Jonathan Kim 
>
> Implement the similarities as GC v9.4.2, and the difference
> for GC v9.4.3 HW spec, i.e. xcc instance.
>
> Signed-off-by: Jonathan Kim 
> Signed-off-by: Eric Huang 
> ---
>  .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  |  10 +-
>  .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h  |  30 
>  .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   | 152
> +-
>  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c|   9 +-
>  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h|  10 +-
>  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c|   3 +-
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  15 +-
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |  10 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_debug.c|   3 +-
>  .../drm/amd/amdkfd/kfd_device_queue_manager.c |   2 +-
>  .../drm/amd/amdkfd/kfd_packet_manager_v9.c|   3 +-
>  .../gpu/drm/amd/include/kgd_kfd_interface.h   |   9 +-
>  12 files changed, 230 insertions(+), 26 deletions(-)
>  create mode 100644
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> index 60f9e027fb66..7d7eaed68531 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> @@ -23,6 +23,7 @@
>  #include "amdgpu_amdkfd.h"
>  #include "amdgpu_amdkfd_arcturus.h"
>  #include "amdgpu_amdkfd_gfx_v9.h"
> +#include "amdgpu_amdkfd_aldebaran.h"
>  #include "gc/gc_9_4_2_offset.h"
>  #include "gc/gc_9_4_2_sh_mask.h"
>  #include 
> @@ -36,7 +37,7 @@
>   * initialize the debug mode registers after it has disabled GFX off during 
> the
>   * debug session.
>   */
> -static uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device
> *adev,
> +uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
>   bool restore_dbg_registers,
>   uint32_t vmid)
>  {
> @@ -50,7 +51,7 @@ static uint32_t
> kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
>  }
>
>  /* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
> -static uint32_t kgd_aldebaran_disable_debug_trap(struct amdgpu_device
> *adev,
> +uint32_t kgd_aldebaran_disable_debug_trap(struct amdgpu_device *adev,
>   bool keep_trap_enabled,
>   uint32_t vmid)
>  {
> @@ -107,7 +108,7 @@ static uint32_t
> kgd_aldebaran_set_wave_launch_trap_override(struct amdgpu_device
>   return data;
>  }
>
> -static uint32_t kgd_aldebaran_set_wave_launch_mode(struct
> amdgpu_device *adev,
> +uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device
> *adev,
>   uint8_t wave_launch_mode,
>   uint32_t vmid)
>  {
> @@ -125,7 +126,8 @@ static uint32_t
> kgd_gfx_aldebaran_set_address_watch(
>   uint32_t watch_address_mask,
>   uint32_t watch_id,
>   uint32_t watch_mode,
> - uint32_t debug_vmid)
> + uint32_t debug_vmid,
> + uint32_t inst )
>  {
>   uint32_t watch_address_high;
>   uint32_t watch_address_low;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
> new file mode 100644
> index ..ed349ff397bd
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
> @@ -0,0 +1,30 @@
> +/*
> + * Copyright 2023 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included
> in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
> EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
> 

[PATCH] drm/amdgpu: Increase IH soft ring size

2023-07-07 Thread Philip Yang
Retry faults are delegated to IH soft ring and then processed by
deferred worker. Current IH soft ring size PAGE_SIZE can store 128
entries, which may overflow and drop retry faults, causes HW stucks
because the retry fault is not recovered.

Increase IH soft ring size to the same size as IH ring, define macro
IH_RING_SIZE to remove duplicate constant.

Show warning message if IH soft ring overflows because this should not
happen any more.

Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c  | 8 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h  | 4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/ih_v6_0.c| 5 +++--
 drivers/gpu/drm/amd/amdgpu/navi10_ih.c  | 5 +++--
 drivers/gpu/drm/amd/amdgpu/vega10_ih.c  | 5 +++--
 drivers/gpu/drm/amd/amdgpu/vega20_ih.c  | 5 +++--
 7 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index fceb3b384955..51a0dbd2358a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -138,6 +138,7 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct 
amdgpu_ih_ring *ih)
 /**
  * amdgpu_ih_ring_write - write IV to the ring buffer
  *
+ * @adev: amdgpu_device pointer
  * @ih: ih ring to write to
  * @iv: the iv to write
  * @num_dw: size of the iv in dw
@@ -145,8 +146,8 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct 
amdgpu_ih_ring *ih)
  * Writes an IV to the ring buffer using the CPU and increment the wptr.
  * Used for testing and delegating IVs to a software ring.
  */
-void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
- unsigned int num_dw)
+void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct amdgpu_ih_ring 
*ih,
+ const uint32_t *iv, unsigned int num_dw)
 {
uint32_t wptr = le32_to_cpu(*ih->wptr_cpu) >> 2;
unsigned int i;
@@ -161,6 +162,9 @@ void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const 
uint32_t *iv,
if (wptr != READ_ONCE(ih->rptr)) {
wmb();
WRITE_ONCE(*ih->wptr_cpu, cpu_to_le32(wptr));
+   } else {
+   dev_warn(adev->dev, "IH soft ring buffer overflow 0x%X, 0x%X\n",
+wptr, ih->rptr);
}
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
index dd1c2eded6b9..a8cf67f1f011 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
@@ -97,8 +97,8 @@ struct amdgpu_ih_funcs {
 int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
unsigned ring_size, bool use_bus_addr);
 void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring 
*ih);
-void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
- unsigned int num_dw);
+void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct amdgpu_ih_ring 
*ih,
+ const uint32_t *iv, unsigned int num_dw);
 int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev,
struct amdgpu_ih_ring *ih);
 int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 5273decc5753..fa6d0adcec20 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -493,7 +493,7 @@ void amdgpu_irq_delegate(struct amdgpu_device *adev,
 struct amdgpu_iv_entry *entry,
 unsigned int num_dw)
 {
-   amdgpu_ih_ring_write(>irq.ih_soft, entry->iv_entry, num_dw);
+   amdgpu_ih_ring_write(adev, >irq.ih_soft, entry->iv_entry, num_dw);
schedule_work(>irq.ih_soft_work);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c 
b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
index b02e1cef78a7..21d2e57cffe7 100644
--- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
@@ -32,6 +32,7 @@
 #include "soc15_common.h"
 #include "ih_v6_0.h"
 
+#define IH_RING_SIZE   (256 * 1024)
 #define MAX_REARM_RETRY 10
 
 static void ih_v6_0_set_interrupt_funcs(struct amdgpu_device *adev);
@@ -535,7 +536,7 @@ static int ih_v6_0_sw_init(void *handle)
 * use bus address for ih ring by psp bl */
use_bus_addr =
(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ? false : true;
-   r = amdgpu_ih_ring_init(adev, >irq.ih, 256 * 1024, use_bus_addr);
+   r = amdgpu_ih_ring_init(adev, >irq.ih, IH_RING_SIZE, 
use_bus_addr);
if (r)
return r;
 
@@ -548,7 +549,7 @@ static int ih_v6_0_sw_init(void *handle)
/* initialize ih control register offset */
ih_v6_0_init_register_offset(adev);
 
-   r = amdgpu_ih_ring_init(adev, 

[PATCH v3] drm/amdgpu: check whether smu is idle in sriov case

2023-07-07 Thread Danijel Slivka
Why:
If the reg mmMP1_SMN_C2PMSG_90 is being programed to 0x0 before
guest initialization, then modprobe amdgpu will fail at smu hw_init.
(the default mmMP1_SMN_C2PMSG_90 at a clean guest environment is 0x1).

How to fix:
this patch is to check whether smu is idle by sending a test
message to smu. If smu is idle, it will respond.

Signed-off-by: Danijel Slivka 
Signed-off-by: Nikola Prica 
Signed-off-by: Jingwen Chen 
Signed-off-by: pengzhou 
---
 .../gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c|  9 +
 drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c| 40 +++
 drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h|  2 +
 3 files changed, 51 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
index aa4a5498a12f..1568b9958150 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
@@ -184,6 +184,15 @@ int smu_v11_0_check_fw_status(struct smu_context *smu)
 {
struct amdgpu_device *adev = smu->adev;
uint32_t mp1_fw_flags;
+   int ret = 0;
+
+   if (amdgpu_sriov_vf(adev)) {
+   ret = smu_cmn_wait_smu_idle(smu);
+   if (ret) {
+   dev_err(adev->dev, "SMU is not idle\n");
+   return ret;
+   }
+   }
 
mp1_fw_flags = RREG32_PCIE(MP1_Public |
   (smnMP1_FIRMWARE_FLAGS & 0x));
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index 3ecb900e6ecd..e3c972984b2b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -313,6 +313,46 @@ int smu_cmn_wait_for_response(struct smu_context *smu)
return res;
 }
 
+/**
+ * smu_cmn_wait_smu_idle -- wait for smu to become idle
+ * @smu: pointer to an SMU context
+ *
+ * Send SMU_MSG_TestMessage to check whether SMU is idle.
+ * If SMU is idle, it will respond.
+ * The returned parameter will be the param you pass + 1.
+ *
+ * Return 0 on success, -errno on error, indicating the execution
+ * status and result of the message being waited for. See
+ * __smu_cmn_reg2errno() for details of the -errno.
+ */
+int smu_cmn_wait_smu_idle(struct smu_context *smu)
+{
+   u32 reg;
+   u32 param = 0xff00011;
+   uint32_t read_arg;
+   int res, index;
+
+   index = smu_cmn_to_asic_specific_index(smu,
+  CMN2ASIC_MAPPING_MSG,
+  SMU_MSG_TestMessage);
+
+   __smu_cmn_send_msg(smu, index, param);
+   reg = __smu_cmn_poll_stat(smu);
+   res = __smu_cmn_reg2errno(smu, reg);
+
+   if (unlikely(smu->adev->pm.smu_debug_mask & SMU_DEBUG_HALT_ON_ERROR) &&
+   res && (res != -ETIME)) {
+   amdgpu_device_halt(smu->adev);
+   WARN_ON(1);
+   }
+
+   smu_cmn_read_arg(smu, _arg);
+   if (read_arg == param + 1)
+   return 0;
+   return res;
+}
+
+
 /**
  * smu_cmn_send_smc_msg_with_param -- send a message with parameter
  * @smu: pointer to an SMU context
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h 
b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
index d7cd358a53bd..65da886d6a8c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
@@ -50,6 +50,8 @@ int smu_cmn_send_debug_smc_msg_with_param(struct smu_context 
*smu,
 
 int smu_cmn_wait_for_response(struct smu_context *smu);
 
+int smu_cmn_wait_smu_idle(struct smu_context *smu);
+
 int smu_cmn_to_asic_specific_index(struct smu_context *smu,
   enum smu_cmn2asic_mapping_type type,
   uint32_t index);
-- 
2.25.1



Re: [PATCH v5 04/10] drm/amdgpu: create GFX-gen11 usermode queue

2023-07-07 Thread Shashank Sharma



On 07/07/2023 14:28, Christian König wrote:



Am 07.07.23 um 12:02 schrieb Shashank Sharma:


On 07/07/2023 10:37, Christian König wrote:

Am 07.07.23 um 09:46 schrieb Shashank Sharma:


On 07/07/2023 09:24, Christian König wrote:



Am 06.07.23 um 14:35 schrieb Shashank Sharma:

A Memory queue descriptor (MQD) of a userqueue defines it in
the hw's context. As MQD format can vary between different
graphics IPs, we need gfx GEN specific handlers to create MQDs.

This patch:
- Introduces MQD handler functions for the usermode queues.
- Adds new functions to create and destroy userqueue MQD for
   GFX-GEN-11 IP

V1: Worked on review comments from Alex:
 - Make MQD functions GEN and IP specific

V2: Worked on review comments from Alex:
 - Reuse the existing adev->mqd[ip] for MQD creation
 - Formatting and arrangement of code

V3:
 - Integration with doorbell manager

V4: Review comments addressed:
 - Do not create a new file for userq, reuse gfx_v11_0.c (Alex)
 - Align name of structure members (Luben)
 - Don't break up the Cc tag list and the Sob tag list in commit
   message (Luben)
V5:
    - No need to reserve the bo for MQD (Christian).
    - Some more changes to support IP specific MQD creation.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 16 
  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c    | 73 
+++

  .../gpu/drm/amd/include/amdgpu_userqueue.h    |  7 ++
  3 files changed, 96 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c

index e37b5da5a0d0..bb774144c372 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -134,12 +134,28 @@ int amdgpu_userq_ioctl(struct drm_device 
*dev, void *data,

  return r;
  }
  +extern const struct amdgpu_userq_funcs userq_gfx_v11_funcs;
+
+static void
+amdgpu_userqueue_setup_gfx(struct amdgpu_userq_mgr *uq_mgr)
+{
+    int maj;
+    struct amdgpu_device *adev = uq_mgr->adev;
+    uint32_t version = adev->ip_versions[GC_HWIP][0];
+
+    /* We support usermode queue only for GFX V11 as of now */
+    maj = IP_VERSION_MAJ(version);
+    if (maj == 11)
+    uq_mgr->userq_funcs[AMDGPU_HW_IP_GFX] = 
_gfx_v11_funcs;

+}
+
  int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, 
struct amdgpu_device *adev)

  {
  mutex_init(_mgr->userq_mutex);
  idr_init_base(_mgr->userq_idr, 1);
  userq_mgr->adev = adev;
  +    amdgpu_userqueue_setup_gfx(userq_mgr);
  return 0;
  }
  diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

index c4940b6ea1c4..e76e1b86b434 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -30,6 +30,7 @@
  #include "amdgpu_psp.h"
  #include "amdgpu_smu.h"
  #include "amdgpu_atomfirmware.h"
+#include "amdgpu_userqueue.h"
  #include "imu_v11_0.h"
  #include "soc21.h"
  #include "nvd.h"
@@ -6486,3 +6487,75 @@ const struct amdgpu_ip_block_version 
gfx_v11_0_ip_block =

  .rev = 0,
  .funcs = _v11_0_ip_funcs,
  };
+
+static int gfx_v11_0_userq_mqd_create(struct amdgpu_userq_mgr 
*uq_mgr,

+  struct drm_amdgpu_userq_in *args_in,
+  struct amdgpu_usermode_queue *queue)
+{
+    struct amdgpu_device *adev = uq_mgr->adev;
+    struct amdgpu_mqd *mqd_gfx_generic = 
>mqds[AMDGPU_HW_IP_GFX];

+    struct drm_amdgpu_userq_mqd_gfx_v11_0 mqd_user;
+    struct amdgpu_mqd_prop userq_props;
+    int r;
+
+    /* Incoming MQD parameters from userspace to be saved here */
+    memset(_user, 0, sizeof(mqd_user));
+
+    /* Structure to initialize MQD for userqueue using generic 
MQD init function */

+    memset(_props, 0, sizeof(userq_props));
+
+    if (args_in->mqd_size != sizeof(struct 
drm_amdgpu_userq_mqd_gfx_v11_0)) {

+    DRM_ERROR("MQD size mismatch\n");
+    return -EINVAL;
+    }
+
+    if (copy_from_user(_user, u64_to_user_ptr(args_in->mqd), 
args_in->mqd_size)) {

+    DRM_ERROR("Failed to get user MQD\n");
+    return -EFAULT;
+    }


Sorry, I've just seen that now. Please don't have a 
copy_from_user() in the backend!


This is pure front end stuff which we shouldn't do in hw 
generation specific code.



This is a bit difficult to achieve, as you know:

- the whole reason we moved to ptr/size based approach from 
fix-mqd-structure approach is so that we can support multiple MQD 
structures using the same UAPI.


- which means that in file amdgpu_userqueue.c layer (say front-end) 
I do not know what is the right size of MQD, its independent of IP.


- the correct size of MQD can only be known in IP specific 
functions which are in gfx_v11.c (back end).


- I may be able to achieve it by adding a new fptr get_mqd_size() 
which can return the right MQD size for me from backend IP 
function, and 

Re: [PATCH v5 04/10] drm/amdgpu: create GFX-gen11 usermode queue

2023-07-07 Thread Christian König




Am 07.07.23 um 12:02 schrieb Shashank Sharma:


On 07/07/2023 10:37, Christian König wrote:

Am 07.07.23 um 09:46 schrieb Shashank Sharma:


On 07/07/2023 09:24, Christian König wrote:



Am 06.07.23 um 14:35 schrieb Shashank Sharma:

A Memory queue descriptor (MQD) of a userqueue defines it in
the hw's context. As MQD format can vary between different
graphics IPs, we need gfx GEN specific handlers to create MQDs.

This patch:
- Introduces MQD handler functions for the usermode queues.
- Adds new functions to create and destroy userqueue MQD for
   GFX-GEN-11 IP

V1: Worked on review comments from Alex:
 - Make MQD functions GEN and IP specific

V2: Worked on review comments from Alex:
 - Reuse the existing adev->mqd[ip] for MQD creation
 - Formatting and arrangement of code

V3:
 - Integration with doorbell manager

V4: Review comments addressed:
 - Do not create a new file for userq, reuse gfx_v11_0.c (Alex)
 - Align name of structure members (Luben)
 - Don't break up the Cc tag list and the Sob tag list in commit
   message (Luben)
V5:
    - No need to reserve the bo for MQD (Christian).
    - Some more changes to support IP specific MQD creation.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 16 
  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c    | 73 
+++

  .../gpu/drm/amd/include/amdgpu_userqueue.h    |  7 ++
  3 files changed, 96 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c

index e37b5da5a0d0..bb774144c372 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -134,12 +134,28 @@ int amdgpu_userq_ioctl(struct drm_device 
*dev, void *data,

  return r;
  }
  +extern const struct amdgpu_userq_funcs userq_gfx_v11_funcs;
+
+static void
+amdgpu_userqueue_setup_gfx(struct amdgpu_userq_mgr *uq_mgr)
+{
+    int maj;
+    struct amdgpu_device *adev = uq_mgr->adev;
+    uint32_t version = adev->ip_versions[GC_HWIP][0];
+
+    /* We support usermode queue only for GFX V11 as of now */
+    maj = IP_VERSION_MAJ(version);
+    if (maj == 11)
+    uq_mgr->userq_funcs[AMDGPU_HW_IP_GFX] = 
_gfx_v11_funcs;

+}
+
  int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, 
struct amdgpu_device *adev)

  {
  mutex_init(_mgr->userq_mutex);
  idr_init_base(_mgr->userq_idr, 1);
  userq_mgr->adev = adev;
  +    amdgpu_userqueue_setup_gfx(userq_mgr);
  return 0;
  }
  diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

index c4940b6ea1c4..e76e1b86b434 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -30,6 +30,7 @@
  #include "amdgpu_psp.h"
  #include "amdgpu_smu.h"
  #include "amdgpu_atomfirmware.h"
+#include "amdgpu_userqueue.h"
  #include "imu_v11_0.h"
  #include "soc21.h"
  #include "nvd.h"
@@ -6486,3 +6487,75 @@ const struct amdgpu_ip_block_version 
gfx_v11_0_ip_block =

  .rev = 0,
  .funcs = _v11_0_ip_funcs,
  };
+
+static int gfx_v11_0_userq_mqd_create(struct amdgpu_userq_mgr 
*uq_mgr,

+  struct drm_amdgpu_userq_in *args_in,
+  struct amdgpu_usermode_queue *queue)
+{
+    struct amdgpu_device *adev = uq_mgr->adev;
+    struct amdgpu_mqd *mqd_gfx_generic = 
>mqds[AMDGPU_HW_IP_GFX];

+    struct drm_amdgpu_userq_mqd_gfx_v11_0 mqd_user;
+    struct amdgpu_mqd_prop userq_props;
+    int r;
+
+    /* Incoming MQD parameters from userspace to be saved here */
+    memset(_user, 0, sizeof(mqd_user));
+
+    /* Structure to initialize MQD for userqueue using generic 
MQD init function */

+    memset(_props, 0, sizeof(userq_props));
+
+    if (args_in->mqd_size != sizeof(struct 
drm_amdgpu_userq_mqd_gfx_v11_0)) {

+    DRM_ERROR("MQD size mismatch\n");
+    return -EINVAL;
+    }
+
+    if (copy_from_user(_user, u64_to_user_ptr(args_in->mqd), 
args_in->mqd_size)) {

+    DRM_ERROR("Failed to get user MQD\n");
+    return -EFAULT;
+    }


Sorry, I've just seen that now. Please don't have a 
copy_from_user() in the backend!


This is pure front end stuff which we shouldn't do in hw generation 
specific code.



This is a bit difficult to achieve, as you know:

- the whole reason we moved to ptr/size based approach from 
fix-mqd-structure approach is so that we can support multiple MQD 
structures using the same UAPI.


- which means that in file amdgpu_userqueue.c layer (say front-end) 
I do not know what is the right size of MQD, its independent of IP.


- the correct size of MQD can only be known in IP specific functions 
which are in gfx_v11.c (back end).


- I may be able to achieve it by adding a new fptr get_mqd_size() 
which can return the right MQD size for me from backend IP function, 
and then I can move this copy from user to 

Re: [PATCH libdrm v2] amdgpu: Use PRI?64 to format uint64_t

2023-07-07 Thread Christian König




Am 06.07.23 um 10:36 schrieb Geert Uytterhoeven:

On 32-bit:

 ../tests/amdgpu/amdgpu_stress.c: In function ‘alloc_bo’:
 ../tests/amdgpu/amdgpu_stress.c:178:49: warning: format ‘%lx’ expects 
argument of type ‘long unsigned int’, but argument 4 has type ‘uint64_t’ {aka 
‘long long unsigned int’} [-Wformat=]
   fprintf(stdout, "Allocated BO number %u at 0x%lx, domain 0x%x, size 
%lu\n",
~~^
%llx
num_buffers++, addr, domain, size);
   
 ../tests/amdgpu/amdgpu_stress.c:178:72: warning: format ‘%lu’ expects 
argument of type ‘long unsigned int’, but argument 6 has type ‘uint64_t’ {aka 
‘long long unsigned int’} [-Wformat=]
   fprintf(stdout, "Allocated BO number %u at 0x%lx, domain 0x%x, size 
%lu\n",
   ~~^
   %llu
num_buffers++, addr, domain, size);
 
 ../tests/amdgpu/amdgpu_stress.c: In function ‘submit_ib’:
 ../tests/amdgpu/amdgpu_stress.c:276:54: warning: format ‘%lx’ expects 
argument of type ‘long unsigned int’, but argument 5 has type ‘uint64_t’ {aka 
‘long long unsigned int’} [-Wformat=]
   fprintf(stdout, "Submitted %u IBs to copy from %u(%lx) to %u(%lx) %lu bytes 
took %lu usec\n",
 ~~^
 %llx
count, from, virtual[from], to, virtual[to], copied, delta / 1000);
 ~
 ../tests/amdgpu/amdgpu_stress.c:276:65: warning: format ‘%lx’ expects 
argument of type ‘long unsigned int’, but argument 7 has type ‘uint64_t’ {aka 
‘long long unsigned int’} [-Wformat=]
   fprintf(stdout, "Submitted %u IBs to copy from %u(%lx) to %u(%lx) %lu bytes 
took %lu usec\n",
~~^
%llx
count, from, virtual[from], to, virtual[to], copied, delta / 1000);
~~~
 ../tests/amdgpu/amdgpu_stress.c:276:70: warning: format ‘%lu’ expects 
argument of type ‘long unsigned int’, but argument 8 has type ‘uint64_t’ {aka 
‘long long unsigned int’} [-Wformat=]
   fprintf(stdout, "Submitted %u IBs to copy from %u(%lx) to %u(%lx) %lu bytes 
took %lu usec\n",
 ~~^
 %llu
count, from, virtual[from], to, virtual[to], copied, delta / 1000);
 ~~
 ../tests/amdgpu/amdgpu_stress.c:276:85: warning: format ‘%lu’ expects 
argument of type ‘long unsigned int’, but argument 9 has type ‘uint64_t’ {aka 
‘long long unsigned int’} [-Wformat=]
   fprintf(stdout, "Submitted %u IBs to copy from %u(%lx) to %u(%lx) %lu bytes 
took %lu usec\n",

~~^

%llu
count, from, virtual[from], to, virtual[to], copied, delta / 1000);
 
 ../tests/amdgpu/amdgpu_stress.c: In function ‘parse_size’:
 ../tests/amdgpu/amdgpu_stress.c:296:24: warning: format ‘%li’ expects 
argument of type ‘long int *’, but argument 3 has type ‘uint64_t *’ {aka ‘long 
long unsigned int *’} [-Wformat=]
   if (sscanf(optarg, "%li%1[kmgKMG]", , ext) < 1) {
   ~~^ ~
   %lli
 ../tests/amdgpu/amdgpu_stress.c: In function ‘main’:
 ../tests/amdgpu/amdgpu_stress.c:378:45: warning: format ‘%lu’ expects 
argument of type ‘long unsigned int’, but argument 3 has type ‘uint64_t’ {aka 
‘long long unsigned int’} [-Wformat=]
  fprintf(stderr, "Buffer size to small %lu\n", size);
~~^ 
%llu

Fix this by using the proper "PRI?64" format specifiers.

Fixes: d77ccdf3ba6f5a39 ("amdgpu: add amdgpu_stress utility v2")
Signed-off-by: Geert Uytterhoeven 


Well generally good patch, but libdrm changes are now reviewed by merge 
request and not on the mailing list any more.


Regards,
Christian.


---
On Linux/amd64, the format strings in the resulting binary are
unchanged.

v2:
   - Use PRI?64 to unbreak 64-bit build.
---
  tests/amdgpu/amdgpu_stress.c | 9 +
  1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/amdgpu/amdgpu_stress.c b/tests/amdgpu/amdgpu_stress.c
index 5c5c88c5be985eb6..f919351e1f17d70b 100644
--- a/tests/amdgpu/amdgpu_stress.c
+++ 

Re: [PATCH] drm/amdgpu: correct the UCODE ID used for VCN 4.0.3 SRAM update

2023-07-07 Thread Lazar, Lijo




On 7/7/2023 5:19 PM, Lang Yu wrote:

On 07/07/ , Lazar, Lijo wrote:



On 7/7/2023 5:01 PM, Lang Yu wrote:

It uses the same UCODE ID(VCN0_RAM) but differnet cmd buffers
for all instances.

Fixes: e928b52c58dd ("drm/amdgpu: use psp_execute_load_ip_fw_cmd_buf instead")

Signed-off-by: Lang Yu 
---
   drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index ce8c766dcc73..8ff814b6b656 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -778,7 +778,7 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device 
*adev, int inst_idx, b
UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
if (indirect)
-   amdgpu_vcn_psp_update_sram(adev, inst_idx);
+   amdgpu_vcn_psp_update_sram(adev, 0);


This doesn't work either for passing the right buffer. Could you revert the
two patches? Not seeing any simplification with those. Previously it was one
psp API to be called.


Oh, thanks. I got it ...

The purpose of psp_execute_load_ip_fw_cmd_buf is to serve other IPs,
they have same use cases like what psp_update_vcn_sram does.

How about just call psp_execute_load_ip_fw_cmd_buf in VCN directly?

Or revert the two patches and add some functions
like psp_update_AAA, psp_update_BBB, ... for Other IPs.



At least for now, I think VCN is the only case which requires an extra 
specific ucode API. All other ucodes are loaded during psp hw_init.


Thanks,
Lijo


Regards,
Lang



Thanks,
Lijo


ring = >vcn.inst[inst_idx].ring_enc[0];


Re: [PATCH] drm/amdgpu: correct the UCODE ID used for VCN 4.0.3 SRAM update

2023-07-07 Thread Lang Yu
On 07/07/ , Lazar, Lijo wrote:
> 
> 
> On 7/7/2023 5:01 PM, Lang Yu wrote:
> > It uses the same UCODE ID(VCN0_RAM) but differnet cmd buffers
> > for all instances.
> > 
> > Fixes: e928b52c58dd ("drm/amdgpu: use psp_execute_load_ip_fw_cmd_buf 
> > instead")
> > 
> > Signed-off-by: Lang Yu 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 2 +-
> >   1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
> > b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> > index ce8c766dcc73..8ff814b6b656 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> > @@ -778,7 +778,7 @@ static int vcn_v4_0_3_start_dpg_mode(struct 
> > amdgpu_device *adev, int inst_idx, b
> > UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
> > if (indirect)
> > -   amdgpu_vcn_psp_update_sram(adev, inst_idx);
> > +   amdgpu_vcn_psp_update_sram(adev, 0);
> 
> This doesn't work either for passing the right buffer. Could you revert the
> two patches? Not seeing any simplification with those. Previously it was one
> psp API to be called.

Oh, thanks. I got it ...

The purpose of psp_execute_load_ip_fw_cmd_buf is to serve other IPs,
they have same use cases like what psp_update_vcn_sram does.

How about just call psp_execute_load_ip_fw_cmd_buf in VCN directly?

Or revert the two patches and add some functions
like psp_update_AAA, psp_update_BBB, ... for Other IPs.

Regards,
Lang


> Thanks,
> Lijo
> 
> > >   ring = >vcn.inst[inst_idx].ring_enc[0];


Re: [PATCH] [v2] drm/amdgpu: avoid integer overflow warning in amdgpu_device_resize_fb_bar()

2023-07-07 Thread Christian König

Am 07.07.23 um 13:11 schrieb Arnd Bergmann:

From: Arnd Bergmann 

On 32-bit architectures comparing a resource against a value larger than
U32_MAX can cause a warning:

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:1344:18: error: result of comparison 
of constant 4294967296 with expression of type 'resource_size_t' (aka 'unsigned 
int') is always false [-Werror,-Wtautological-constant-out-of-range-compare]
 res->start > 0x1ull)
 ~~ ^ ~~

As gcc does not warn about this in dead code, add an IS_ENABLED() check at
the start of the function. This will always return success but not actually 
resize
the BAR on 32-bit architectures without high memory, which is exactly what
we want here, as the driver can fall back to bank switching the VRAM
access.

Fixes: 31b8adab3247e ("drm/amdgpu: require a root bus window above 4GB for BAR 
resize")
Signed-off-by: Arnd Bergmann 


Reviewed-by: Christian König 


---
v2: return early instead of shutting up the warning with a cast and
running into a failure
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 7f069e1731fee..fcf5f07c47751 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1325,6 +1325,9 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device 
*adev)
u16 cmd;
int r;
  
+	if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))

+   return 0;
+
/* Bypass for VF */
if (amdgpu_sriov_vf(adev))
return 0;




Re: [PATCH] drm/amdgpu: correct the UCODE ID used for VCN 4.0.3 SRAM update

2023-07-07 Thread Lazar, Lijo




On 7/7/2023 5:01 PM, Lang Yu wrote:

It uses the same UCODE ID(VCN0_RAM) but differnet cmd buffers
for all instances.

Fixes: e928b52c58dd ("drm/amdgpu: use psp_execute_load_ip_fw_cmd_buf instead")

Signed-off-by: Lang Yu 
---
  drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index ce8c766dcc73..8ff814b6b656 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -778,7 +778,7 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device 
*adev, int inst_idx, b
UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
  
  	if (indirect)

-   amdgpu_vcn_psp_update_sram(adev, inst_idx);
+   amdgpu_vcn_psp_update_sram(adev, 0);


This doesn't work either for passing the right buffer. Could you revert 
the two patches? Not seeing any simplification with those. Previously it 
was one psp API to be called.


Thanks,
Lijo


>ring = >vcn.inst[inst_idx].ring_enc[0];
  


[PATCH] drm/amdgpu: correct the UCODE ID used for VCN 4.0.3 SRAM update

2023-07-07 Thread Lang Yu
It uses the same UCODE ID(VCN0_RAM) but differnet cmd buffers
for all instances.

Fixes: e928b52c58dd ("drm/amdgpu: use psp_execute_load_ip_fw_cmd_buf instead")

Signed-off-by: Lang Yu 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index ce8c766dcc73..8ff814b6b656 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -778,7 +778,7 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device 
*adev, int inst_idx, b
UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
 
if (indirect)
-   amdgpu_vcn_psp_update_sram(adev, inst_idx);
+   amdgpu_vcn_psp_update_sram(adev, 0);
 
ring = >vcn.inst[inst_idx].ring_enc[0];
 
-- 
2.25.1



Re: [PATCH 2/2] drm/amdgpu: use psp_execute_load_ip_fw_cmd_buf instead

2023-07-07 Thread Lang Yu
On 07/07/ , Lazar, Lijo wrote:
> 
> 
> On 6/29/2023 1:44 PM, Lang Yu wrote:
> > Ping.
> > 
> > On 06/27/ , Lang Yu wrote:
> > > Replace the old ones with psp_execute_load_ip_fw_cmd_buf.
> > > 
> > > Signed-off-by: Lang Yu 
> > > ---
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 31 -
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h |  2 --
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c |  9 +++
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h |  2 ++
> > >   drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c   |  4 +---
> > >   drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c   |  4 +---
> > >   drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c   |  4 +---
> > >   drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c   |  4 +---
> > >   drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c |  4 +---
> > >   9 files changed, 20 insertions(+), 44 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
> > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> > > index a1cb541f315f..b61963112118 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> > > @@ -2474,21 +2474,11 @@ int psp_execute_load_ip_fw_cmd_buf(struct 
> > > amdgpu_device *adev,
> > >   return ret;
> > >   }
> > > -static int psp_execute_non_psp_fw_load(struct psp_context *psp,
> > > -   struct amdgpu_firmware_info *ucode)
> > > +static inline
> > > +int psp_execute_non_psp_fw_load(struct psp_context *psp,
> > > + struct amdgpu_firmware_info *ucode)
> > >   {
> > > - int ret = 0;
> > > - struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
> > > -
> > > - ret = psp_prep_load_ip_fw_cmd_buf(ucode, cmd);
> > > - if (!ret) {
> > > - ret = psp_cmd_submit_buf(psp, ucode, cmd,
> > > -  psp->fence_buf_mc_addr);
> > > - }
> > > -
> > > - release_psp_cmd_buf(psp);
> > > -
> > > - return ret;
> > > + return psp_execute_load_ip_fw_cmd_buf(psp->adev, ucode, 0, 0, 0);
> > >   }
> > >   static int psp_load_smu_fw(struct psp_context *psp)
> > > @@ -2946,19 +2936,6 @@ int psp_rlc_autoload_start(struct psp_context *psp)
> > >   return ret;
> > >   }
> > > -int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx,
> > > - uint64_t cmd_gpu_addr, int cmd_size)
> > > -{
> > > - struct amdgpu_firmware_info ucode = {0};
> > > -
> > > - ucode.ucode_id = inst_idx ? AMDGPU_UCODE_ID_VCN1_RAM :
> > > - AMDGPU_UCODE_ID_VCN0_RAM;
> > > - ucode.mc_addr = cmd_gpu_addr;
> > > - ucode.ucode_size = cmd_size;
> > > -
> > > - return psp_execute_non_psp_fw_load(>psp, );
> > > -}
> > > -
> > >   int psp_ring_cmd_submit(struct psp_context *psp,
> > >   uint64_t cmd_buf_mc_addr,
> > >   uint64_t fence_mc_addr,
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h 
> > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
> > > index bd324fed6237..e49984a9d570 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
> > > @@ -459,8 +459,6 @@ extern int psp_wait_for_spirom_update(struct 
> > > psp_context *psp, uint32_t reg_inde
> > >   uint32_t field_val, uint32_t mask, uint32_t 
> > > msec_timeout);
> > >   int psp_gpu_reset(struct amdgpu_device *adev);
> > > -int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx,
> > > - uint64_t cmd_gpu_addr, int cmd_size);
> > >   int psp_execute_load_ip_fw_cmd_buf(struct amdgpu_device *adev,
> > >  struct amdgpu_firmware_info *ucode,
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
> > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> > > index d37ebd4402ef..1805cd042d34 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> > > @@ -1257,3 +1257,12 @@ int amdgpu_vcn_ras_sw_init(struct amdgpu_device 
> > > *adev)
> > >   return 0;
> > >   }
> > > +
> > > +int amdgpu_vcn_psp_update_sram(struct amdgpu_device *adev, int inst_idx)
> > > +{
> > > + return psp_execute_load_ip_fw_cmd_buf(adev, NULL,
> > > + inst_idx ? AMDGPU_UCODE_ID_VCN1_RAM : AMDGPU_UCODE_ID_VCN0_RAM,
> > > + adev->vcn.inst[inst_idx].dpg_sram_gpu_addr,
> > > + 
> > > (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr -
> > > +
> > > (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr));
> > > +}
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h 
> > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
> > > index 92d5534df5f4..3ac5ad91ed08 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
> > > @@ -414,4 +414,6 @@ int amdgpu_vcn_ras_late_init(struct amdgpu_device 
> > > *adev,
> > >   struct ras_common_if *ras_block);
> > >   int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev);
> > > +int 

[PATCH] [v2] drm/amdgpu: avoid integer overflow warning in amdgpu_device_resize_fb_bar()

2023-07-07 Thread Arnd Bergmann
From: Arnd Bergmann 

On 32-bit architectures comparing a resource against a value larger than
U32_MAX can cause a warning:

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:1344:18: error: result of comparison 
of constant 4294967296 with expression of type 'resource_size_t' (aka 'unsigned 
int') is always false [-Werror,-Wtautological-constant-out-of-range-compare]
res->start > 0x1ull)
~~ ^ ~~

As gcc does not warn about this in dead code, add an IS_ENABLED() check at
the start of the function. This will always return success but not actually 
resize
the BAR on 32-bit architectures without high memory, which is exactly what
we want here, as the driver can fall back to bank switching the VRAM
access.

Fixes: 31b8adab3247e ("drm/amdgpu: require a root bus window above 4GB for BAR 
resize")
Signed-off-by: Arnd Bergmann 
---
v2: return early instead of shutting up the warning with a cast and
running into a failure
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 7f069e1731fee..fcf5f07c47751 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1325,6 +1325,9 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device 
*adev)
u16 cmd;
int r;
 
+   if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
+   return 0;
+
/* Bypass for VF */
if (amdgpu_sriov_vf(adev))
return 0;
-- 
2.39.2



Re: [PATCH v2] drm/amdgpu: check whether smu is idle in sriov case

2023-07-07 Thread Lazar, Lijo




On 7/7/2023 3:47 PM, Danijel Slivka wrote:

Why:
If the reg mmMP1_SMN_C2PMSG_90 is being programed to 0x0 before
guest initialization, then modprobe amdgpu will fail at smu hw_init.
(the default mmMP1_SMN_C2PMSG_90 at a clean guest environment is 0x1).

A response to the FW  message doesn't mean SMU is idle. Probably, this 
is only a check that FW is ready.


Instead of introducing a new ppt function, move this implementation to 
check_fw_status(). For VF case, test message may be sent to ascertain 
that FW is ready.


Thanks,
Lijo


How to fix:
this patch is to check whether smu is idle by sending a test
message to smu. If smu is idle, it will respond.

Signed-off-by: Danijel Slivka 
Signed-off-by: Nikola Prica 
Signed-off-by: Jingwen Chen 
Signed-off-by: pengzhou 
---
  drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c |  8 
  drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  7 
  .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   |  1 +
  drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c| 40 +++
  drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h|  2 +
  5 files changed, 58 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index ce41a8309582..63ea4cd32ece 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1443,6 +1443,14 @@ static int smu_start_smc_engine(struct smu_context *smu)
}
}
  
+	if (amdgpu_sriov_vf(adev) && smu->ppt_funcs->wait_smu_idle) {

+   ret = smu->ppt_funcs->wait_smu_idle(smu);
+   if (ret) {
+   dev_err(adev->dev, "SMU is not idle\n");
+   return ret;
+   }
+   }
+
/*
 * Send msg GetDriverIfVersion to check if the return value is equal
 * with DRIVER_IF_VERSION of smc header.
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 6e2069dcb6b9..1bf87ad30d93 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -926,6 +926,13 @@ struct pptable_funcs {
 */
int (*check_fw_status)(struct smu_context *smu);
  
+	/**

+* @wait_smu_idle: wait for SMU idle status.
+*
+* Return: Zero if check passes, negative errno on failure.
+*/
+   int (*wait_smu_idle)(struct smu_context *smu);
+
/**
 * @set_mp1_state: put SMU into a correct state for comming
 * resume from runpm or gpu reset.
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index c94d825a871b..3745e4f96433 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -3503,6 +3503,7 @@ static const struct pptable_funcs navi10_ppt_funcs = {
.init_power = smu_v11_0_init_power,
.fini_power = smu_v11_0_fini_power,
.check_fw_status = smu_v11_0_check_fw_status,
+   .wait_smu_idle = smu_cmn_wait_smu_idle,
.setup_pptable = navi10_setup_pptable,
.get_vbios_bootup_values = smu_v11_0_get_vbios_bootup_values,
.check_fw_version = smu_v11_0_check_fw_version,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index 3ecb900e6ecd..e3c972984b2b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -313,6 +313,46 @@ int smu_cmn_wait_for_response(struct smu_context *smu)
return res;
  }
  
+/**

+ * smu_cmn_wait_smu_idle -- wait for smu to become idle
+ * @smu: pointer to an SMU context
+ *
+ * Send SMU_MSG_TestMessage to check whether SMU is idle.
+ * If SMU is idle, it will respond.
+ * The returned parameter will be the param you pass + 1.
+ *
+ * Return 0 on success, -errno on error, indicating the execution
+ * status and result of the message being waited for. See
+ * __smu_cmn_reg2errno() for details of the -errno.
+ */
+int smu_cmn_wait_smu_idle(struct smu_context *smu)
+{
+   u32 reg;
+   u32 param = 0xff00011;
+   uint32_t read_arg;
+   int res, index;
+
+   index = smu_cmn_to_asic_specific_index(smu,
+  CMN2ASIC_MAPPING_MSG,
+  SMU_MSG_TestMessage);
+
+   __smu_cmn_send_msg(smu, index, param);
+   reg = __smu_cmn_poll_stat(smu);
+   res = __smu_cmn_reg2errno(smu, reg);
+
+   if (unlikely(smu->adev->pm.smu_debug_mask & SMU_DEBUG_HALT_ON_ERROR) &&
+   res && (res != -ETIME)) {
+   amdgpu_device_halt(smu->adev);
+   WARN_ON(1);
+   }
+
+   smu_cmn_read_arg(smu, _arg);
+   if (read_arg == param + 1)
+   return 0;
+   return res;
+}
+
+
  /**
   * smu_cmn_send_smc_msg_with_param -- send a message with parameter
   * @smu: pointer to an SMU context

Re: [PATCH 2/2] drm/amdgpu: use psp_execute_load_ip_fw_cmd_buf instead

2023-07-07 Thread Lazar, Lijo




On 6/29/2023 1:44 PM, Lang Yu wrote:

Ping.

On 06/27/ , Lang Yu wrote:

Replace the old ones with psp_execute_load_ip_fw_cmd_buf.

Signed-off-by: Lang Yu 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 31 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h |  2 --
  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c |  9 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h |  2 ++
  drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c   |  4 +---
  drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c   |  4 +---
  drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c   |  4 +---
  drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c   |  4 +---
  drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c |  4 +---
  9 files changed, 20 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index a1cb541f315f..b61963112118 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -2474,21 +2474,11 @@ int psp_execute_load_ip_fw_cmd_buf(struct amdgpu_device 
*adev,
return ret;
  }
  
-static int psp_execute_non_psp_fw_load(struct psp_context *psp,

- struct amdgpu_firmware_info *ucode)
+static inline
+int psp_execute_non_psp_fw_load(struct psp_context *psp,
+   struct amdgpu_firmware_info *ucode)
  {
-   int ret = 0;
-   struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
-
-   ret = psp_prep_load_ip_fw_cmd_buf(ucode, cmd);
-   if (!ret) {
-   ret = psp_cmd_submit_buf(psp, ucode, cmd,
-psp->fence_buf_mc_addr);
-   }
-
-   release_psp_cmd_buf(psp);
-
-   return ret;
+   return psp_execute_load_ip_fw_cmd_buf(psp->adev, ucode, 0, 0, 0);
  }
  
  static int psp_load_smu_fw(struct psp_context *psp)

@@ -2946,19 +2936,6 @@ int psp_rlc_autoload_start(struct psp_context *psp)
return ret;
  }
  
-int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx,

-   uint64_t cmd_gpu_addr, int cmd_size)
-{
-   struct amdgpu_firmware_info ucode = {0};
-
-   ucode.ucode_id = inst_idx ? AMDGPU_UCODE_ID_VCN1_RAM :
-   AMDGPU_UCODE_ID_VCN0_RAM;
-   ucode.mc_addr = cmd_gpu_addr;
-   ucode.ucode_size = cmd_size;
-
-   return psp_execute_non_psp_fw_load(>psp, );
-}
-
  int psp_ring_cmd_submit(struct psp_context *psp,
uint64_t cmd_buf_mc_addr,
uint64_t fence_mc_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index bd324fed6237..e49984a9d570 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -459,8 +459,6 @@ extern int psp_wait_for_spirom_update(struct psp_context 
*psp, uint32_t reg_inde
uint32_t field_val, uint32_t mask, uint32_t 
msec_timeout);
  
  int psp_gpu_reset(struct amdgpu_device *adev);

-int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx,
-   uint64_t cmd_gpu_addr, int cmd_size);
  
  int psp_execute_load_ip_fw_cmd_buf(struct amdgpu_device *adev,

   struct amdgpu_firmware_info *ucode,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index d37ebd4402ef..1805cd042d34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -1257,3 +1257,12 @@ int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev)
  
  	return 0;

  }
+
+int amdgpu_vcn_psp_update_sram(struct amdgpu_device *adev, int inst_idx)
+{
+   return psp_execute_load_ip_fw_cmd_buf(adev, NULL,
+   inst_idx ? AMDGPU_UCODE_ID_VCN1_RAM : AMDGPU_UCODE_ID_VCN0_RAM,
+   adev->vcn.inst[inst_idx].dpg_sram_gpu_addr,
+   
(uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr -
+  
(uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr));
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 92d5534df5f4..3ac5ad91ed08 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -414,4 +414,6 @@ int amdgpu_vcn_ras_late_init(struct amdgpu_device *adev,
struct ras_common_if *ras_block);
  int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev);
  
+int amdgpu_vcn_psp_update_sram(struct amdgpu_device *adev, int inst_idx);

+
  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
index c975aed2f6c7..74cd1522067c 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -881,9 +881,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device 
*adev, bool indirect)
UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
  
  	if (indirect)

-   psp_update_vcn_sram(adev, 0, adev->vcn.inst->dpg_sram_gpu_addr,

[PATCH v2] drm/amdgpu: check whether smu is idle in sriov case

2023-07-07 Thread Danijel Slivka
Why:
If the reg mmMP1_SMN_C2PMSG_90 is being programed to 0x0 before
guest initialization, then modprobe amdgpu will fail at smu hw_init.
(the default mmMP1_SMN_C2PMSG_90 at a clean guest environment is 0x1).

How to fix:
this patch is to check whether smu is idle by sending a test
message to smu. If smu is idle, it will respond.

Signed-off-by: Danijel Slivka 
Signed-off-by: Nikola Prica 
Signed-off-by: Jingwen Chen 
Signed-off-by: pengzhou 
---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c |  8 
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  7 
 .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   |  1 +
 drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c| 40 +++
 drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h|  2 +
 5 files changed, 58 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index ce41a8309582..63ea4cd32ece 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1443,6 +1443,14 @@ static int smu_start_smc_engine(struct smu_context *smu)
}
}
 
+   if (amdgpu_sriov_vf(adev) && smu->ppt_funcs->wait_smu_idle) {
+   ret = smu->ppt_funcs->wait_smu_idle(smu);
+   if (ret) {
+   dev_err(adev->dev, "SMU is not idle\n");
+   return ret;
+   }
+   }
+
/*
 * Send msg GetDriverIfVersion to check if the return value is equal
 * with DRIVER_IF_VERSION of smc header.
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 6e2069dcb6b9..1bf87ad30d93 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -926,6 +926,13 @@ struct pptable_funcs {
 */
int (*check_fw_status)(struct smu_context *smu);
 
+   /**
+* @wait_smu_idle: wait for SMU idle status.
+*
+* Return: Zero if check passes, negative errno on failure.
+*/
+   int (*wait_smu_idle)(struct smu_context *smu);
+
/**
 * @set_mp1_state: put SMU into a correct state for comming
 * resume from runpm or gpu reset.
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index c94d825a871b..3745e4f96433 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -3503,6 +3503,7 @@ static const struct pptable_funcs navi10_ppt_funcs = {
.init_power = smu_v11_0_init_power,
.fini_power = smu_v11_0_fini_power,
.check_fw_status = smu_v11_0_check_fw_status,
+   .wait_smu_idle = smu_cmn_wait_smu_idle,
.setup_pptable = navi10_setup_pptable,
.get_vbios_bootup_values = smu_v11_0_get_vbios_bootup_values,
.check_fw_version = smu_v11_0_check_fw_version,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index 3ecb900e6ecd..e3c972984b2b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -313,6 +313,46 @@ int smu_cmn_wait_for_response(struct smu_context *smu)
return res;
 }
 
+/**
+ * smu_cmn_wait_smu_idle -- wait for smu to become idle
+ * @smu: pointer to an SMU context
+ *
+ * Send SMU_MSG_TestMessage to check whether SMU is idle.
+ * If SMU is idle, it will respond.
+ * The returned parameter will be the param you pass + 1.
+ *
+ * Return 0 on success, -errno on error, indicating the execution
+ * status and result of the message being waited for. See
+ * __smu_cmn_reg2errno() for details of the -errno.
+ */
+int smu_cmn_wait_smu_idle(struct smu_context *smu)
+{
+   u32 reg;
+   u32 param = 0xff00011;
+   uint32_t read_arg;
+   int res, index;
+
+   index = smu_cmn_to_asic_specific_index(smu,
+  CMN2ASIC_MAPPING_MSG,
+  SMU_MSG_TestMessage);
+
+   __smu_cmn_send_msg(smu, index, param);
+   reg = __smu_cmn_poll_stat(smu);
+   res = __smu_cmn_reg2errno(smu, reg);
+
+   if (unlikely(smu->adev->pm.smu_debug_mask & SMU_DEBUG_HALT_ON_ERROR) &&
+   res && (res != -ETIME)) {
+   amdgpu_device_halt(smu->adev);
+   WARN_ON(1);
+   }
+
+   smu_cmn_read_arg(smu, _arg);
+   if (read_arg == param + 1)
+   return 0;
+   return res;
+}
+
+
 /**
  * smu_cmn_send_smc_msg_with_param -- send a message with parameter
  * @smu: pointer to an SMU context
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h 
b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
index d7cd358a53bd..65da886d6a8c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
@@ -50,6 +50,8 @@ int smu_cmn_send_debug_smc_msg_with_param(struct smu_context 
*smu,
 
 int 

Re: [PATCH v5 04/10] drm/amdgpu: create GFX-gen11 usermode queue

2023-07-07 Thread Shashank Sharma



On 07/07/2023 10:37, Christian König wrote:

Am 07.07.23 um 09:46 schrieb Shashank Sharma:


On 07/07/2023 09:24, Christian König wrote:



Am 06.07.23 um 14:35 schrieb Shashank Sharma:

A Memory queue descriptor (MQD) of a userqueue defines it in
the hw's context. As MQD format can vary between different
graphics IPs, we need gfx GEN specific handlers to create MQDs.

This patch:
- Introduces MQD handler functions for the usermode queues.
- Adds new functions to create and destroy userqueue MQD for
   GFX-GEN-11 IP

V1: Worked on review comments from Alex:
 - Make MQD functions GEN and IP specific

V2: Worked on review comments from Alex:
 - Reuse the existing adev->mqd[ip] for MQD creation
 - Formatting and arrangement of code

V3:
 - Integration with doorbell manager

V4: Review comments addressed:
 - Do not create a new file for userq, reuse gfx_v11_0.c (Alex)
 - Align name of structure members (Luben)
 - Don't break up the Cc tag list and the Sob tag list in commit
   message (Luben)
V5:
    - No need to reserve the bo for MQD (Christian).
    - Some more changes to support IP specific MQD creation.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 16 
  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c    | 73 
+++

  .../gpu/drm/amd/include/amdgpu_userqueue.h    |  7 ++
  3 files changed, 96 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c

index e37b5da5a0d0..bb774144c372 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -134,12 +134,28 @@ int amdgpu_userq_ioctl(struct drm_device 
*dev, void *data,

  return r;
  }
  +extern const struct amdgpu_userq_funcs userq_gfx_v11_funcs;
+
+static void
+amdgpu_userqueue_setup_gfx(struct amdgpu_userq_mgr *uq_mgr)
+{
+    int maj;
+    struct amdgpu_device *adev = uq_mgr->adev;
+    uint32_t version = adev->ip_versions[GC_HWIP][0];
+
+    /* We support usermode queue only for GFX V11 as of now */
+    maj = IP_VERSION_MAJ(version);
+    if (maj == 11)
+    uq_mgr->userq_funcs[AMDGPU_HW_IP_GFX] = _gfx_v11_funcs;
+}
+
  int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, 
struct amdgpu_device *adev)

  {
  mutex_init(_mgr->userq_mutex);
  idr_init_base(_mgr->userq_idr, 1);
  userq_mgr->adev = adev;
  +    amdgpu_userqueue_setup_gfx(userq_mgr);
  return 0;
  }
  diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

index c4940b6ea1c4..e76e1b86b434 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -30,6 +30,7 @@
  #include "amdgpu_psp.h"
  #include "amdgpu_smu.h"
  #include "amdgpu_atomfirmware.h"
+#include "amdgpu_userqueue.h"
  #include "imu_v11_0.h"
  #include "soc21.h"
  #include "nvd.h"
@@ -6486,3 +6487,75 @@ const struct amdgpu_ip_block_version 
gfx_v11_0_ip_block =

  .rev = 0,
  .funcs = _v11_0_ip_funcs,
  };
+
+static int gfx_v11_0_userq_mqd_create(struct amdgpu_userq_mgr 
*uq_mgr,

+  struct drm_amdgpu_userq_in *args_in,
+  struct amdgpu_usermode_queue *queue)
+{
+    struct amdgpu_device *adev = uq_mgr->adev;
+    struct amdgpu_mqd *mqd_gfx_generic = 
>mqds[AMDGPU_HW_IP_GFX];

+    struct drm_amdgpu_userq_mqd_gfx_v11_0 mqd_user;
+    struct amdgpu_mqd_prop userq_props;
+    int r;
+
+    /* Incoming MQD parameters from userspace to be saved here */
+    memset(_user, 0, sizeof(mqd_user));
+
+    /* Structure to initialize MQD for userqueue using generic MQD 
init function */

+    memset(_props, 0, sizeof(userq_props));
+
+    if (args_in->mqd_size != sizeof(struct 
drm_amdgpu_userq_mqd_gfx_v11_0)) {

+    DRM_ERROR("MQD size mismatch\n");
+    return -EINVAL;
+    }
+
+    if (copy_from_user(_user, u64_to_user_ptr(args_in->mqd), 
args_in->mqd_size)) {

+    DRM_ERROR("Failed to get user MQD\n");
+    return -EFAULT;
+    }


Sorry, I've just seen that now. Please don't have a copy_from_user() 
in the backend!


This is pure front end stuff which we shouldn't do in hw generation 
specific code.



This is a bit difficult to achieve, as you know:

- the whole reason we moved to ptr/size based approach from 
fix-mqd-structure approach is so that we can support multiple MQD 
structures using the same UAPI.


- which means that in file amdgpu_userqueue.c layer (say front-end) I 
do not know what is the right size of MQD, its independent of IP.


- the correct size of MQD can only be known in IP specific functions 
which are in gfx_v11.c (back end).


- I may be able to achieve it by adding a new fptr get_mqd_size() 
which can return the right MQD size for me from backend IP function, 
and then I can move this copy from user to front-end. Does it sound 
like a good idea to you ?


Just 

Re: [PATCH v5 08/10] drm/amdgpu: generate doorbell index for userqueue

2023-07-07 Thread Shashank Sharma



On 07/07/2023 09:57, Christian König wrote:

Am 07.07.23 um 09:39 schrieb Shashank Sharma:


On 07/07/2023 09:15, Christian König wrote:

Am 06.07.23 um 14:36 schrieb Shashank Sharma:

The userspace sends us the doorbell object and the relative doobell
index in the object to be used for the usermode queue, but the FW
expects the absolute doorbell index on the PCI BAR in the MQD. This
patch adds a function to convert this relative doorbell index to
absolute doorbell index.

This patch is dependent on the doorbell manager series:
Link: https://patchwork.freedesktop.org/series/115802/

V5: Fix the db object reference leak (Christian)

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 34 
+++

  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c    |  1 +
  2 files changed, 35 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c

index bb774144c372..61064266c4f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -32,6 +32,31 @@ amdgpu_userqueue_find(struct amdgpu_userq_mgr 
*uq_mgr, int qid)

  return idr_find(_mgr->userq_idr, qid);
  }
  +static uint64_t
+amdgpu_userqueue_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue,
+ struct drm_file *filp,
+ uint32_t doorbell_offset)
+{
+    struct drm_gem_object *gobj;
+    struct amdgpu_bo *db_bo;
+    uint64_t index;
+
+    gobj = drm_gem_object_lookup(filp, queue->doorbell_handle);
+    if (gobj == NULL) {
+    DRM_ERROR("Can't find GEM object for doorbell\n");
+    return -EINVAL;
+    }
+
+    db_bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
+    drm_gem_object_put(gobj);
+
+    index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_bo, 
doorbell_offset);


This can only be done with the doorbell BO locked and as soon as you 
unlock it the value becomes invalid unless you pin the BO.


Which means I need to use create_bo_kernel() for doorbell BO's or 
specifically pin it while creating it ?


For now I think you need to pin it when amdgpu_userqueue_create() is 
called and unpin it when the userqueue is destroyed again.


It's probably a good idea to not use amdgpu_bo_create_kernel() for the 
MQD and context BO either, but rather explicitly pin it during queue 
create as well.



Noted, will do that.

- Shashank



Christian.



- Shashank


Regards,
Christian.


+    amdgpu_bo_unref(_bo);
+    DRM_DEBUG_DRIVER("[Usermode queues] doorbell index=%lld\n", 
index);

+    return index;
+}
+
  static int
  amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id)
  {
@@ -64,6 +89,7 @@ amdgpu_userqueue_create(struct drm_file *filp, 
union drm_amdgpu_userq *args)

  struct amdgpu_userq_mgr *uq_mgr = >userq_mgr;
  const struct amdgpu_userq_funcs *uq_funcs;
  struct amdgpu_usermode_queue *queue;
+    uint64_t index;
  int qid, r = 0;
    mutex_lock(_mgr->userq_mutex);
@@ -87,6 +113,14 @@ amdgpu_userqueue_create(struct drm_file *filp, 
union drm_amdgpu_userq *args)

  queue->flags = args->in.flags;
  queue->vm = >vm;
  +    /* Convert relative doorbell offset into absolute doorbell 
index */
+    index = amdgpu_userqueue_get_doorbell_index(uq_mgr, queue, 
filp, args->in.doorbell_offset);

+    if (index == (uint64_t)-EINVAL) {
+    DRM_ERROR("Failed to get doorbell for queue\n");
+    goto unlock;
+    }
+    queue->doorbell_index = index;
+
  r = uq_funcs->mqd_create(uq_mgr, >in, queue);
  if (r) {
  DRM_ERROR("Failed to create Queue\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

index afaeecb9940a..8edb020683a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -6719,6 +6719,7 @@ static int gfx_v11_0_userq_mqd_create(struct 
amdgpu_userq_mgr *uq_mgr,

  userq_props.queue_size = mqd_user.queue_size;
  userq_props.hqd_base_gpu_addr = mqd_user.queue_va;
  userq_props.mqd_gpu_addr = queue->mqd.gpu_addr;
+    userq_props.doorbell_index = queue->doorbell_index;
  userq_props.use_doorbell = true;
    r = mqd_gfx_generic->init_mqd(adev, (void 
*)queue->mqd.cpu_ptr, _props);






Re: [PATCH v5 04/10] drm/amdgpu: create GFX-gen11 usermode queue

2023-07-07 Thread Christian König

Am 07.07.23 um 09:46 schrieb Shashank Sharma:


On 07/07/2023 09:24, Christian König wrote:



Am 06.07.23 um 14:35 schrieb Shashank Sharma:

A Memory queue descriptor (MQD) of a userqueue defines it in
the hw's context. As MQD format can vary between different
graphics IPs, we need gfx GEN specific handlers to create MQDs.

This patch:
- Introduces MQD handler functions for the usermode queues.
- Adds new functions to create and destroy userqueue MQD for
   GFX-GEN-11 IP

V1: Worked on review comments from Alex:
 - Make MQD functions GEN and IP specific

V2: Worked on review comments from Alex:
 - Reuse the existing adev->mqd[ip] for MQD creation
 - Formatting and arrangement of code

V3:
 - Integration with doorbell manager

V4: Review comments addressed:
 - Do not create a new file for userq, reuse gfx_v11_0.c (Alex)
 - Align name of structure members (Luben)
 - Don't break up the Cc tag list and the Sob tag list in commit
   message (Luben)
V5:
    - No need to reserve the bo for MQD (Christian).
    - Some more changes to support IP specific MQD creation.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 16 
  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c    | 73 
+++

  .../gpu/drm/amd/include/amdgpu_userqueue.h    |  7 ++
  3 files changed, 96 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c

index e37b5da5a0d0..bb774144c372 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -134,12 +134,28 @@ int amdgpu_userq_ioctl(struct drm_device *dev, 
void *data,

  return r;
  }
  +extern const struct amdgpu_userq_funcs userq_gfx_v11_funcs;
+
+static void
+amdgpu_userqueue_setup_gfx(struct amdgpu_userq_mgr *uq_mgr)
+{
+    int maj;
+    struct amdgpu_device *adev = uq_mgr->adev;
+    uint32_t version = adev->ip_versions[GC_HWIP][0];
+
+    /* We support usermode queue only for GFX V11 as of now */
+    maj = IP_VERSION_MAJ(version);
+    if (maj == 11)
+    uq_mgr->userq_funcs[AMDGPU_HW_IP_GFX] = _gfx_v11_funcs;
+}
+
  int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, 
struct amdgpu_device *adev)

  {
  mutex_init(_mgr->userq_mutex);
  idr_init_base(_mgr->userq_idr, 1);
  userq_mgr->adev = adev;
  +    amdgpu_userqueue_setup_gfx(userq_mgr);
  return 0;
  }
  diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

index c4940b6ea1c4..e76e1b86b434 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -30,6 +30,7 @@
  #include "amdgpu_psp.h"
  #include "amdgpu_smu.h"
  #include "amdgpu_atomfirmware.h"
+#include "amdgpu_userqueue.h"
  #include "imu_v11_0.h"
  #include "soc21.h"
  #include "nvd.h"
@@ -6486,3 +6487,75 @@ const struct amdgpu_ip_block_version 
gfx_v11_0_ip_block =

  .rev = 0,
  .funcs = _v11_0_ip_funcs,
  };
+
+static int gfx_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
+  struct drm_amdgpu_userq_in *args_in,
+  struct amdgpu_usermode_queue *queue)
+{
+    struct amdgpu_device *adev = uq_mgr->adev;
+    struct amdgpu_mqd *mqd_gfx_generic = 
>mqds[AMDGPU_HW_IP_GFX];

+    struct drm_amdgpu_userq_mqd_gfx_v11_0 mqd_user;
+    struct amdgpu_mqd_prop userq_props;
+    int r;
+
+    /* Incoming MQD parameters from userspace to be saved here */
+    memset(_user, 0, sizeof(mqd_user));
+
+    /* Structure to initialize MQD for userqueue using generic MQD 
init function */

+    memset(_props, 0, sizeof(userq_props));
+
+    if (args_in->mqd_size != sizeof(struct 
drm_amdgpu_userq_mqd_gfx_v11_0)) {

+    DRM_ERROR("MQD size mismatch\n");
+    return -EINVAL;
+    }
+
+    if (copy_from_user(_user, u64_to_user_ptr(args_in->mqd), 
args_in->mqd_size)) {

+    DRM_ERROR("Failed to get user MQD\n");
+    return -EFAULT;
+    }


Sorry, I've just seen that now. Please don't have a copy_from_user() 
in the backend!


This is pure front end stuff which we shouldn't do in hw generation 
specific code.



This is a bit difficult to achieve, as you know:

- the whole reason we moved to ptr/size based approach from 
fix-mqd-structure approach is so that we can support multiple MQD 
structures using the same UAPI.


- which means that in file amdgpu_userqueue.c layer (say front-end) I 
do not know what is the right size of MQD, its independent of IP.


- the correct size of MQD can only be known in IP specific functions 
which are in gfx_v11.c (back end).


- I may be able to achieve it by adding a new fptr get_mqd_size() 
which can return the right MQD size for me from backend IP function, 
and then I can move this copy from user to front-end. Does it sound 
like a good idea to you ?


Just use memdup_user() in the frontend. Allocating 

Re: [PATCH 1/2] drm/amdgpu/gfx9: move update_spm_vmid() out of rlc_init()

2023-07-07 Thread Christian König

Am 06.07.23 um 20:55 schrieb Alex Deucher:

rlc_init() is part of sw_init() so it should not touch hardware.
Additionally, calling the rlc update_spm_vmid() callback
directly invokes a gfx on/off cycle which could result in
powergating being enabled before hw init is complete.  Split
update_spm_vmid() into an internal implementation for local
use without gfxoff interaction and then the rlc callback
which includes gfxoff handling.  lbpw_init also touches
hardware so mvoe that to rlc_resume as well.

Signed-off-by: Alex Deucher 


Acked-by: Christian König  for the series.


---
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 34 ---
  1 file changed, 15 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index d654bdd2037c9..7d992e4730db1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -762,6 +762,8 @@ static void gfx_v9_0_query_ras_error_count(struct 
amdgpu_device *adev,
  static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
 void *inject_if, uint32_t instance_mask);
  static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
+static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
+ unsigned vmid);
  
  static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,

uint64_t queue_mask)
@@ -1669,22 +1671,6 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
return r;
}
  
-	switch (adev->ip_versions[GC_HWIP][0]) {

-   case IP_VERSION(9, 2, 2):
-   case IP_VERSION(9, 1, 0):
-   gfx_v9_0_init_lbpw(adev);
-   break;
-   case IP_VERSION(9, 4, 0):
-   gfx_v9_4_init_lbpw(adev);
-   break;
-   default:
-   break;
-   }
-
-   /* init spm vmid with 0xf */
-   if (adev->gfx.rlc.funcs->update_spm_vmid)
-   adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
-
return 0;
  }
  
@@ -2944,12 +2930,14 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)

switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(9, 2, 2):
case IP_VERSION(9, 1, 0):
+   gfx_v9_0_init_lbpw(adev);
if (amdgpu_lbpw == 0)
gfx_v9_0_enable_lbpw(adev, false);
else
gfx_v9_0_enable_lbpw(adev, true);
break;
case IP_VERSION(9, 4, 0):
+   gfx_v9_4_init_lbpw(adev);
if (amdgpu_lbpw > 0)
gfx_v9_0_enable_lbpw(adev, true);
else
@@ -2959,6 +2947,8 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
break;
}
  
+	gfx_v9_0_update_spm_vmid_internal(adev, 0xf);

+
adev->gfx.rlc.funcs->start(adev);
  
  	return 0;

@@ -4883,12 +4873,11 @@ static int gfx_v9_0_update_gfx_clock_gating(struct 
amdgpu_device *adev,
return 0;
  }
  
-static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)

+static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
+ unsigned vmid)
  {
u32 reg, data;
  
-	amdgpu_gfx_off_ctrl(adev, false);

-
reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
if (amdgpu_sriov_is_pp_one_vf(adev))
data = RREG32_NO_KIQ(reg);
@@ -4902,6 +4891,13 @@ static void gfx_v9_0_update_spm_vmid(struct 
amdgpu_device *adev, unsigned vmid)
WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
else
WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
+}
+
+static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
+{
+   amdgpu_gfx_off_ctrl(adev, false);
+
+   gfx_v9_0_update_spm_vmid_internal(adev, vmid);
  
  	amdgpu_gfx_off_ctrl(adev, true);

  }




Re: [PATCH] drm/amdgpu: Always emit GDS switch when GDS/GWS/OA is used

2023-07-07 Thread Christian König

Am 07.07.23 um 09:28 schrieb Friedrich Vock:

Hi Christian,

On 07.07.23 08:56, Christian König wrote:



Am 07.07.23 um 08:28 schrieb Friedrich Vock:

During gfxoff, the per-VMID GDS registers are reset and not restored
afterwards.


Hui? Since when? Those registers should be part of the saved ones.

Have you found that by observation?


yes. I tested this on my RX 6700 XT and the Steam Deck (Vangogh). In the
bug report I linked, a test program using GWS I developed hangs because
of this.

The hang occurs as soon as the kernel re-uses a VMID on which GWS was
already used once. In the hung state, inspecting the per-VMID GWS
registers shows that the values have been reset to 0.
The hang does not occur when gfxoff is disabled.

Even without causing hangs, you can confirm the behaviour by doing the
following:
1. Disable gfxoff.
2. Set some GWS registers.
3. Enable gfxoff and wait a bit.
4. Disable gfxoff and read the registers again. The GWS registers have
been reset.

I performed this test for the GDS_BASE/SIZE registers and it seems these
aren't affected, so it's only GWS that is buggy here.


That's most like a bug in the FW then. I'm going to ask around internally.


I should probably make a v2 that combines the behaviour before this
patch for GDS and OA, and the patched behaviour for GWS.


Yeah, that sounds like a good idea to me. But let me ping the fw teams 
first.




I'm not aware of userspace using GWS (yet, I had some ideas for using it
in RADV which is what I've been writing these tests for),
so perhaps the Cc to stable can also be omitted.


Depends on what the fw teams says. As far as I know GWS has never been 
used widely on Linux.


Could be that they say there is a hw bug and we deprecated it for this 
generation, or it's simply not handled by the fw and the driver needs to 
take care of this (like this patch does) or whatever.


Thanks for the notice,
Christian.



Thanks,
Friedrich



Thanks,
Christian.



  The kernel needs to emit a GDS switch to manually update the
GWS registers in this case. Since gfxoff can happen between any two
submissions and the kernel has no way of knowing, emit the GDS switch
before every submission.

Fixes: 56b0989e29 ("drm/amdgpu: fix GDS/GWS/OA switch handling")
Cc: sta...@vger.kernel.org
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2530
Signed-off-by: Friedrich Vock 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 22 +++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.h |  1 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 10 --
  3 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index ff1ea99292fb..de73797e9279 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -165,24 +165,17 @@ bool amdgpu_vmid_had_gpu_reset(struct
amdgpu_device *adev,
  atomic_read(>gpu_reset_counter);
  }

-/* Check if we need to switch to another set of resources */
-static bool amdgpu_vmid_gds_switch_needed(struct amdgpu_vmid *id,
-  struct amdgpu_job *job)
-{
-    return id->gds_base != job->gds_base ||
-    id->gds_size != job->gds_size ||
-    id->gws_base != job->gws_base ||
-    id->gws_size != job->gws_size ||
-    id->oa_base != job->oa_base ||
-    id->oa_size != job->oa_size;
-}
-
  /* Check if the id is compatible with the job */
  static bool amdgpu_vmid_compatible(struct amdgpu_vmid *id,
 struct amdgpu_job *job)
  {
  return  id->pd_gpu_addr == job->vm_pd_addr &&
-    !amdgpu_vmid_gds_switch_needed(id, job);
+    id->gds_base == job->gds_base &&
+    id->gds_size == job->gds_size &&
+    id->gws_base == job->gws_base &&
+    id->gws_size == job->gws_size &&
+    id->oa_base == job->oa_base &&
+    id->oa_size == job->oa_size;
  }

  /**
@@ -434,7 +427,6 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct
amdgpu_ring *ring,
  list_move_tail(>list, _mgr->ids_lru);
  }

-    job->gds_switch_needed = amdgpu_vmid_gds_switch_needed(id, job);
  if (job->vm_needs_flush) {
  id->flushed_updates = amdgpu_vm_tlb_seq(vm);
  dma_fence_put(id->last_flush);
@@ -503,7 +495,7 @@ void amdgpu_vmid_free_reserved(struct
amdgpu_device *adev,
   * @vmhub: vmhub type
   * @vmid: vmid number to use
   *
- * Reset saved GDW, GWS and OA to force switch on next flush.
+ * Reset saved GDS, GWS and OA data.
   */
  void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
 unsigned vmid)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index a963a25ddd62..2898508b1ce4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -53,7 +53,6 @@ struct amdgpu_job {
  uint32_t    preamble_status;
  uint32_t    preemption_status;
  bool    

Re: [PATCH v5 08/10] drm/amdgpu: generate doorbell index for userqueue

2023-07-07 Thread Christian König

Am 07.07.23 um 09:39 schrieb Shashank Sharma:


On 07/07/2023 09:15, Christian König wrote:

Am 06.07.23 um 14:36 schrieb Shashank Sharma:

The userspace sends us the doorbell object and the relative doobell
index in the object to be used for the usermode queue, but the FW
expects the absolute doorbell index on the PCI BAR in the MQD. This
patch adds a function to convert this relative doorbell index to
absolute doorbell index.

This patch is dependent on the doorbell manager series:
Link: https://patchwork.freedesktop.org/series/115802/

V5: Fix the db object reference leak (Christian)

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 34 
+++

  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c    |  1 +
  2 files changed, 35 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c

index bb774144c372..61064266c4f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -32,6 +32,31 @@ amdgpu_userqueue_find(struct amdgpu_userq_mgr 
*uq_mgr, int qid)

  return idr_find(_mgr->userq_idr, qid);
  }
  +static uint64_t
+amdgpu_userqueue_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue,
+ struct drm_file *filp,
+ uint32_t doorbell_offset)
+{
+    struct drm_gem_object *gobj;
+    struct amdgpu_bo *db_bo;
+    uint64_t index;
+
+    gobj = drm_gem_object_lookup(filp, queue->doorbell_handle);
+    if (gobj == NULL) {
+    DRM_ERROR("Can't find GEM object for doorbell\n");
+    return -EINVAL;
+    }
+
+    db_bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
+    drm_gem_object_put(gobj);
+
+    index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_bo, 
doorbell_offset);


This can only be done with the doorbell BO locked and as soon as you 
unlock it the value becomes invalid unless you pin the BO.


Which means I need to use create_bo_kernel() for doorbell BO's or 
specifically pin it while creating it ?


For now I think you need to pin it when amdgpu_userqueue_create() is 
called and unpin it when the userqueue is destroyed again.


It's probably a good idea to not use amdgpu_bo_create_kernel() for the 
MQD and context BO either, but rather explicitly pin it during queue 
create as well.


Christian.



- Shashank


Regards,
Christian.


+    amdgpu_bo_unref(_bo);
+    DRM_DEBUG_DRIVER("[Usermode queues] doorbell index=%lld\n", 
index);

+    return index;
+}
+
  static int
  amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id)
  {
@@ -64,6 +89,7 @@ amdgpu_userqueue_create(struct drm_file *filp, 
union drm_amdgpu_userq *args)

  struct amdgpu_userq_mgr *uq_mgr = >userq_mgr;
  const struct amdgpu_userq_funcs *uq_funcs;
  struct amdgpu_usermode_queue *queue;
+    uint64_t index;
  int qid, r = 0;
    mutex_lock(_mgr->userq_mutex);
@@ -87,6 +113,14 @@ amdgpu_userqueue_create(struct drm_file *filp, 
union drm_amdgpu_userq *args)

  queue->flags = args->in.flags;
  queue->vm = >vm;
  +    /* Convert relative doorbell offset into absolute doorbell 
index */
+    index = amdgpu_userqueue_get_doorbell_index(uq_mgr, queue, 
filp, args->in.doorbell_offset);

+    if (index == (uint64_t)-EINVAL) {
+    DRM_ERROR("Failed to get doorbell for queue\n");
+    goto unlock;
+    }
+    queue->doorbell_index = index;
+
  r = uq_funcs->mqd_create(uq_mgr, >in, queue);
  if (r) {
  DRM_ERROR("Failed to create Queue\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

index afaeecb9940a..8edb020683a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -6719,6 +6719,7 @@ static int gfx_v11_0_userq_mqd_create(struct 
amdgpu_userq_mgr *uq_mgr,

  userq_props.queue_size = mqd_user.queue_size;
  userq_props.hqd_base_gpu_addr = mqd_user.queue_va;
  userq_props.mqd_gpu_addr = queue->mqd.gpu_addr;
+    userq_props.doorbell_index = queue->doorbell_index;
  userq_props.use_doorbell = true;
    r = mqd_gfx_generic->init_mqd(adev, (void 
*)queue->mqd.cpu_ptr, _props);






Re: [PATCH v5 04/10] drm/amdgpu: create GFX-gen11 usermode queue

2023-07-07 Thread Shashank Sharma



On 07/07/2023 09:24, Christian König wrote:



Am 06.07.23 um 14:35 schrieb Shashank Sharma:

A Memory queue descriptor (MQD) of a userqueue defines it in
the hw's context. As MQD format can vary between different
graphics IPs, we need gfx GEN specific handlers to create MQDs.

This patch:
- Introduces MQD handler functions for the usermode queues.
- Adds new functions to create and destroy userqueue MQD for
   GFX-GEN-11 IP

V1: Worked on review comments from Alex:
 - Make MQD functions GEN and IP specific

V2: Worked on review comments from Alex:
 - Reuse the existing adev->mqd[ip] for MQD creation
 - Formatting and arrangement of code

V3:
 - Integration with doorbell manager

V4: Review comments addressed:
 - Do not create a new file for userq, reuse gfx_v11_0.c (Alex)
 - Align name of structure members (Luben)
 - Don't break up the Cc tag list and the Sob tag list in commit
   message (Luben)
V5:
    - No need to reserve the bo for MQD (Christian).
    - Some more changes to support IP specific MQD creation.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 16 
  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c    | 73 +++
  .../gpu/drm/amd/include/amdgpu_userqueue.h    |  7 ++
  3 files changed, 96 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c

index e37b5da5a0d0..bb774144c372 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -134,12 +134,28 @@ int amdgpu_userq_ioctl(struct drm_device *dev, 
void *data,

  return r;
  }
  +extern const struct amdgpu_userq_funcs userq_gfx_v11_funcs;
+
+static void
+amdgpu_userqueue_setup_gfx(struct amdgpu_userq_mgr *uq_mgr)
+{
+    int maj;
+    struct amdgpu_device *adev = uq_mgr->adev;
+    uint32_t version = adev->ip_versions[GC_HWIP][0];
+
+    /* We support usermode queue only for GFX V11 as of now */
+    maj = IP_VERSION_MAJ(version);
+    if (maj == 11)
+    uq_mgr->userq_funcs[AMDGPU_HW_IP_GFX] = _gfx_v11_funcs;
+}
+
  int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, 
struct amdgpu_device *adev)

  {
  mutex_init(_mgr->userq_mutex);
  idr_init_base(_mgr->userq_idr, 1);
  userq_mgr->adev = adev;
  +    amdgpu_userqueue_setup_gfx(userq_mgr);
  return 0;
  }
  diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

index c4940b6ea1c4..e76e1b86b434 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -30,6 +30,7 @@
  #include "amdgpu_psp.h"
  #include "amdgpu_smu.h"
  #include "amdgpu_atomfirmware.h"
+#include "amdgpu_userqueue.h"
  #include "imu_v11_0.h"
  #include "soc21.h"
  #include "nvd.h"
@@ -6486,3 +6487,75 @@ const struct amdgpu_ip_block_version 
gfx_v11_0_ip_block =

  .rev = 0,
  .funcs = _v11_0_ip_funcs,
  };
+
+static int gfx_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
+  struct drm_amdgpu_userq_in *args_in,
+  struct amdgpu_usermode_queue *queue)
+{
+    struct amdgpu_device *adev = uq_mgr->adev;
+    struct amdgpu_mqd *mqd_gfx_generic = >mqds[AMDGPU_HW_IP_GFX];
+    struct drm_amdgpu_userq_mqd_gfx_v11_0 mqd_user;
+    struct amdgpu_mqd_prop userq_props;
+    int r;
+
+    /* Incoming MQD parameters from userspace to be saved here */
+    memset(_user, 0, sizeof(mqd_user));
+
+    /* Structure to initialize MQD for userqueue using generic MQD 
init function */

+    memset(_props, 0, sizeof(userq_props));
+
+    if (args_in->mqd_size != sizeof(struct 
drm_amdgpu_userq_mqd_gfx_v11_0)) {

+    DRM_ERROR("MQD size mismatch\n");
+    return -EINVAL;
+    }
+
+    if (copy_from_user(_user, u64_to_user_ptr(args_in->mqd), 
args_in->mqd_size)) {

+    DRM_ERROR("Failed to get user MQD\n");
+    return -EFAULT;
+    }


Sorry, I've just seen that now. Please don't have a copy_from_user() 
in the backend!


This is pure front end stuff which we shouldn't do in hw generation 
specific code.



This is a bit difficult to achieve, as you know:

- the whole reason we moved to ptr/size based approach from 
fix-mqd-structure approach is so that we can support multiple MQD 
structures using the same UAPI.


- which means that in file amdgpu_userqueue.c layer (say front-end) I do 
not know what is the right size of MQD, its independent of IP.


- the correct size of MQD can only be known in IP specific functions 
which are in gfx_v11.c (back end).


- I may be able to achieve it by adding a new fptr get_mqd_size() which 
can return the right MQD size for me from backend IP function, and then 
I can move this copy from user to front-end. Does it sound like a good 
idea to you ?


- Shashank


Regards,
Christian.


+
+    /* Create BO for actual Userqueue MQD now */
+    r = 

Re: [PATCH v5 09/10] drm/amdgpu: cleanup leftover queues

2023-07-07 Thread Shashank Sharma



On 07/07/2023 09:17, Christian König wrote:



Am 06.07.23 um 14:36 schrieb Shashank Sharma:

This patch adds code to cleanup any leftover userqueues which
a user might have missed to destroy due to a crash or any other
programming error.

Cc: Alex Deucher 
Cc: Christian Koenig 
Suggested-by: Bas Nieuwenhuizen 
Signed-off-by: Bas Nieuwenhuizen 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 31 ---
  1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c

index 61064266c4f8..6e32e2854a58 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -57,12 +57,23 @@ amdgpu_userqueue_get_doorbell_index(struct 
amdgpu_userq_mgr *uq_mgr,

  return index;
  }
  +static void
+amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue,
+ int queue_id)
+{
+    const struct amdgpu_userq_funcs *uq_funcs = 
uq_mgr->userq_funcs[queue->queue_type];

+
+    uq_funcs->mqd_destroy(uq_mgr, queue);
+    idr_remove(_mgr->userq_idr, queue_id);
+    kfree(queue);
+}
+
  static int
  amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id)
  {
  struct amdgpu_fpriv *fpriv = filp->driver_priv;
  struct amdgpu_userq_mgr *uq_mgr = >userq_mgr;
-    const struct amdgpu_userq_funcs *uq_funcs;
  struct amdgpu_usermode_queue *queue;
    mutex_lock(_mgr->userq_mutex);
@@ -73,11 +84,8 @@ amdgpu_userqueue_destroy(struct drm_file *filp, 
int queue_id)

  mutex_unlock(_mgr->userq_mutex);
  return -EINVAL;
  }
-    uq_funcs = uq_mgr->userq_funcs[queue->queue_type];
-    uq_funcs->mqd_destroy(uq_mgr, queue);
-    idr_remove(_mgr->userq_idr, queue_id);
-    kfree(queue);
  +    amdgpu_userqueue_cleanup(uq_mgr, queue, queue_id);
  mutex_unlock(_mgr->userq_mutex);
  return 0;
  }
@@ -193,8 +201,21 @@ int amdgpu_userq_mgr_init(struct 
amdgpu_userq_mgr *userq_mgr, struct amdgpu_devi

  return 0;
  }
  +static int amdgpu_userqueue_cleanup_residue(int queue_id, void 
*ptr, void *data)

+{
+    struct amdgpu_userq_mgr *uq_mgr = data;
+    struct amdgpu_usermode_queue *queue = ptr;
+
+    amdgpu_userqueue_cleanup(uq_mgr, queue, queue_id);
+    return 0;
+}
+
  void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr)
  {
+    idr_for_each(_mgr->userq_idr,
+ amdgpu_userqueue_cleanup_residue,
+ userq_mgr);
+


Better use idr_for_each_entry() here which avoids the mid layer function.

Apart from that it would be nice to have to merge this patch into the 
original one adding the user queues, but really only nice to have.


Noted, will check that out.

- Shashank



Christian.


idr_destroy(_mgr->userq_idr);
  mutex_destroy(_mgr->userq_mutex);
  }




Re: [PATCH v5 08/10] drm/amdgpu: generate doorbell index for userqueue

2023-07-07 Thread Shashank Sharma



On 07/07/2023 09:15, Christian König wrote:

Am 06.07.23 um 14:36 schrieb Shashank Sharma:

The userspace sends us the doorbell object and the relative doobell
index in the object to be used for the usermode queue, but the FW
expects the absolute doorbell index on the PCI BAR in the MQD. This
patch adds a function to convert this relative doorbell index to
absolute doorbell index.

This patch is dependent on the doorbell manager series:
Link: https://patchwork.freedesktop.org/series/115802/

V5: Fix the db object reference leak (Christian)

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 34 +++
  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c    |  1 +
  2 files changed, 35 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c

index bb774144c372..61064266c4f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -32,6 +32,31 @@ amdgpu_userqueue_find(struct amdgpu_userq_mgr 
*uq_mgr, int qid)

  return idr_find(_mgr->userq_idr, qid);
  }
  +static uint64_t
+amdgpu_userqueue_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue,
+ struct drm_file *filp,
+ uint32_t doorbell_offset)
+{
+    struct drm_gem_object *gobj;
+    struct amdgpu_bo *db_bo;
+    uint64_t index;
+
+    gobj = drm_gem_object_lookup(filp, queue->doorbell_handle);
+    if (gobj == NULL) {
+    DRM_ERROR("Can't find GEM object for doorbell\n");
+    return -EINVAL;
+    }
+
+    db_bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
+    drm_gem_object_put(gobj);
+
+    index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_bo, 
doorbell_offset);


This can only be done with the doorbell BO locked and as soon as you 
unlock it the value becomes invalid unless you pin the BO.


Which means I need to use create_bo_kernel() for doorbell BO's or 
specifically pin it while creating it ?


- Shashank


Regards,
Christian.


+    amdgpu_bo_unref(_bo);
+    DRM_DEBUG_DRIVER("[Usermode queues] doorbell index=%lld\n", index);
+    return index;
+}
+
  static int
  amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id)
  {
@@ -64,6 +89,7 @@ amdgpu_userqueue_create(struct drm_file *filp, 
union drm_amdgpu_userq *args)

  struct amdgpu_userq_mgr *uq_mgr = >userq_mgr;
  const struct amdgpu_userq_funcs *uq_funcs;
  struct amdgpu_usermode_queue *queue;
+    uint64_t index;
  int qid, r = 0;
    mutex_lock(_mgr->userq_mutex);
@@ -87,6 +113,14 @@ amdgpu_userqueue_create(struct drm_file *filp, 
union drm_amdgpu_userq *args)

  queue->flags = args->in.flags;
  queue->vm = >vm;
  +    /* Convert relative doorbell offset into absolute doorbell 
index */
+    index = amdgpu_userqueue_get_doorbell_index(uq_mgr, queue, filp, 
args->in.doorbell_offset);

+    if (index == (uint64_t)-EINVAL) {
+    DRM_ERROR("Failed to get doorbell for queue\n");
+    goto unlock;
+    }
+    queue->doorbell_index = index;
+
  r = uq_funcs->mqd_create(uq_mgr, >in, queue);
  if (r) {
  DRM_ERROR("Failed to create Queue\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

index afaeecb9940a..8edb020683a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -6719,6 +6719,7 @@ static int gfx_v11_0_userq_mqd_create(struct 
amdgpu_userq_mgr *uq_mgr,

  userq_props.queue_size = mqd_user.queue_size;
  userq_props.hqd_base_gpu_addr = mqd_user.queue_va;
  userq_props.mqd_gpu_addr = queue->mqd.gpu_addr;
+    userq_props.doorbell_index = queue->doorbell_index;
  userq_props.use_doorbell = true;
    r = mqd_gfx_generic->init_mqd(adev, (void 
*)queue->mqd.cpu_ptr, _props);




Re: [PATCH] drm/amdgpu: Always emit GDS switch when GDS/GWS/OA is used

2023-07-07 Thread Friedrich Vock

Hi Christian,

On 07.07.23 08:56, Christian König wrote:



Am 07.07.23 um 08:28 schrieb Friedrich Vock:

During gfxoff, the per-VMID GDS registers are reset and not restored
afterwards.


Hui? Since when? Those registers should be part of the saved ones.

Have you found that by observation?


yes. I tested this on my RX 6700 XT and the Steam Deck (Vangogh). In the
bug report I linked, a test program using GWS I developed hangs because
of this.

The hang occurs as soon as the kernel re-uses a VMID on which GWS was
already used once. In the hung state, inspecting the per-VMID GWS
registers shows that the values have been reset to 0.
The hang does not occur when gfxoff is disabled.

Even without causing hangs, you can confirm the behaviour by doing the
following:
1. Disable gfxoff.
2. Set some GWS registers.
3. Enable gfxoff and wait a bit.
4. Disable gfxoff and read the registers again. The GWS registers have
been reset.

I performed this test for the GDS_BASE/SIZE registers and it seems these
aren't affected, so it's only GWS that is buggy here.
I should probably make a v2 that combines the behaviour before this
patch for GDS and OA, and the patched behaviour for GWS.

I'm not aware of userspace using GWS (yet, I had some ideas for using it
in RADV which is what I've been writing these tests for),
so perhaps the Cc to stable can also be omitted.

Thanks,
Friedrich



Thanks,
Christian.



  The kernel needs to emit a GDS switch to manually update the
GWS registers in this case. Since gfxoff can happen between any two
submissions and the kernel has no way of knowing, emit the GDS switch
before every submission.

Fixes: 56b0989e29 ("drm/amdgpu: fix GDS/GWS/OA switch handling")
Cc: sta...@vger.kernel.org
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2530
Signed-off-by: Friedrich Vock 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 22 +++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.h |  1 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 10 --
  3 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index ff1ea99292fb..de73797e9279 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -165,24 +165,17 @@ bool amdgpu_vmid_had_gpu_reset(struct
amdgpu_device *adev,
  atomic_read(>gpu_reset_counter);
  }

-/* Check if we need to switch to another set of resources */
-static bool amdgpu_vmid_gds_switch_needed(struct amdgpu_vmid *id,
-  struct amdgpu_job *job)
-{
-    return id->gds_base != job->gds_base ||
-    id->gds_size != job->gds_size ||
-    id->gws_base != job->gws_base ||
-    id->gws_size != job->gws_size ||
-    id->oa_base != job->oa_base ||
-    id->oa_size != job->oa_size;
-}
-
  /* Check if the id is compatible with the job */
  static bool amdgpu_vmid_compatible(struct amdgpu_vmid *id,
 struct amdgpu_job *job)
  {
  return  id->pd_gpu_addr == job->vm_pd_addr &&
-    !amdgpu_vmid_gds_switch_needed(id, job);
+    id->gds_base == job->gds_base &&
+    id->gds_size == job->gds_size &&
+    id->gws_base == job->gws_base &&
+    id->gws_size == job->gws_size &&
+    id->oa_base == job->oa_base &&
+    id->oa_size == job->oa_size;
  }

  /**
@@ -434,7 +427,6 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct
amdgpu_ring *ring,
  list_move_tail(>list, _mgr->ids_lru);
  }

-    job->gds_switch_needed = amdgpu_vmid_gds_switch_needed(id, job);
  if (job->vm_needs_flush) {
  id->flushed_updates = amdgpu_vm_tlb_seq(vm);
  dma_fence_put(id->last_flush);
@@ -503,7 +495,7 @@ void amdgpu_vmid_free_reserved(struct
amdgpu_device *adev,
   * @vmhub: vmhub type
   * @vmid: vmid number to use
   *
- * Reset saved GDW, GWS and OA to force switch on next flush.
+ * Reset saved GDS, GWS and OA data.
   */
  void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
 unsigned vmid)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index a963a25ddd62..2898508b1ce4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -53,7 +53,6 @@ struct amdgpu_job {
  uint32_t    preamble_status;
  uint32_t    preemption_status;
  bool    vm_needs_flush;
-    bool    gds_switch_needed;
  bool    spm_update_needed;
  uint64_t    vm_pd_addr;
  unsigned    vmid;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 291977b93b1d..61856040cae2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -557,6 +557,12 @@ void amdgpu_vm_check_compute_bug(struct
amdgpu_device *adev)
  }
  }

+/* Check if the job needs a GDS switch */
+static bool 

Re: [PATCH v5 04/10] drm/amdgpu: create GFX-gen11 usermode queue

2023-07-07 Thread Christian König




Am 06.07.23 um 14:35 schrieb Shashank Sharma:

A Memory queue descriptor (MQD) of a userqueue defines it in
the hw's context. As MQD format can vary between different
graphics IPs, we need gfx GEN specific handlers to create MQDs.

This patch:
- Introduces MQD handler functions for the usermode queues.
- Adds new functions to create and destroy userqueue MQD for
   GFX-GEN-11 IP

V1: Worked on review comments from Alex:
 - Make MQD functions GEN and IP specific

V2: Worked on review comments from Alex:
 - Reuse the existing adev->mqd[ip] for MQD creation
 - Formatting and arrangement of code

V3:
 - Integration with doorbell manager

V4: Review comments addressed:
 - Do not create a new file for userq, reuse gfx_v11_0.c (Alex)
 - Align name of structure members (Luben)
 - Don't break up the Cc tag list and the Sob tag list in commit
   message (Luben)
V5:
- No need to reserve the bo for MQD (Christian).
- Some more changes to support IP specific MQD creation.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 16 
  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c| 73 +++
  .../gpu/drm/amd/include/amdgpu_userqueue.h|  7 ++
  3 files changed, 96 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index e37b5da5a0d0..bb774144c372 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -134,12 +134,28 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
return r;
  }
  
+extern const struct amdgpu_userq_funcs userq_gfx_v11_funcs;

+
+static void
+amdgpu_userqueue_setup_gfx(struct amdgpu_userq_mgr *uq_mgr)
+{
+   int maj;
+   struct amdgpu_device *adev = uq_mgr->adev;
+   uint32_t version = adev->ip_versions[GC_HWIP][0];
+
+   /* We support usermode queue only for GFX V11 as of now */
+   maj = IP_VERSION_MAJ(version);
+   if (maj == 11)
+   uq_mgr->userq_funcs[AMDGPU_HW_IP_GFX] = _gfx_v11_funcs;
+}
+
  int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct 
amdgpu_device *adev)
  {
mutex_init(_mgr->userq_mutex);
idr_init_base(_mgr->userq_idr, 1);
userq_mgr->adev = adev;
  
+	amdgpu_userqueue_setup_gfx(userq_mgr);

return 0;
  }
  
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

index c4940b6ea1c4..e76e1b86b434 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -30,6 +30,7 @@
  #include "amdgpu_psp.h"
  #include "amdgpu_smu.h"
  #include "amdgpu_atomfirmware.h"
+#include "amdgpu_userqueue.h"
  #include "imu_v11_0.h"
  #include "soc21.h"
  #include "nvd.h"
@@ -6486,3 +6487,75 @@ const struct amdgpu_ip_block_version gfx_v11_0_ip_block =
.rev = 0,
.funcs = _v11_0_ip_funcs,
  };
+
+static int gfx_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
+ struct drm_amdgpu_userq_in *args_in,
+ struct amdgpu_usermode_queue *queue)
+{
+   struct amdgpu_device *adev = uq_mgr->adev;
+   struct amdgpu_mqd *mqd_gfx_generic = >mqds[AMDGPU_HW_IP_GFX];
+   struct drm_amdgpu_userq_mqd_gfx_v11_0 mqd_user;
+   struct amdgpu_mqd_prop userq_props;
+   int r;
+
+   /* Incoming MQD parameters from userspace to be saved here */
+   memset(_user, 0, sizeof(mqd_user));
+
+   /* Structure to initialize MQD for userqueue using generic MQD init 
function */
+   memset(_props, 0, sizeof(userq_props));
+
+   if (args_in->mqd_size != sizeof(struct drm_amdgpu_userq_mqd_gfx_v11_0)) 
{
+   DRM_ERROR("MQD size mismatch\n");
+   return -EINVAL;
+   }
+
+   if (copy_from_user(_user, u64_to_user_ptr(args_in->mqd), 
args_in->mqd_size)) {
+   DRM_ERROR("Failed to get user MQD\n");
+   return -EFAULT;
+   }


Sorry, I've just seen that now. Please don't have a copy_from_user() in 
the backend!


This is pure front end stuff which we shouldn't do in hw generation 
specific code.


Regards,
Christian.


+
+   /* Create BO for actual Userqueue MQD now */
+   r = amdgpu_bo_create_kernel(adev, mqd_gfx_generic->mqd_size, PAGE_SIZE,
+   AMDGPU_GEM_DOMAIN_GTT,
+   >mqd.obj,
+   >mqd.gpu_addr,
+   >mqd.cpu_ptr);
+   if (r) {
+   DRM_ERROR("Failed to allocate BO for userqueue (%d)", r);
+   return -ENOMEM;
+   }
+   memset(queue->mqd.cpu_ptr, 0, mqd_gfx_generic->mqd_size);
+
+   /* Initialize the MQD BO with user given values */
+   userq_props.wptr_gpu_addr = mqd_user.wptr_va;
+   userq_props.rptr_gpu_addr = 

Re: [PATCH v5 09/10] drm/amdgpu: cleanup leftover queues

2023-07-07 Thread Christian König




Am 06.07.23 um 14:36 schrieb Shashank Sharma:

This patch adds code to cleanup any leftover userqueues which
a user might have missed to destroy due to a crash or any other
programming error.

Cc: Alex Deucher 
Cc: Christian Koenig 
Suggested-by: Bas Nieuwenhuizen 
Signed-off-by: Bas Nieuwenhuizen 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 31 ---
  1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index 61064266c4f8..6e32e2854a58 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -57,12 +57,23 @@ amdgpu_userqueue_get_doorbell_index(struct amdgpu_userq_mgr 
*uq_mgr,
return index;
  }
  
+static void

+amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr *uq_mgr,
+struct amdgpu_usermode_queue *queue,
+int queue_id)
+{
+   const struct amdgpu_userq_funcs *uq_funcs = 
uq_mgr->userq_funcs[queue->queue_type];
+
+   uq_funcs->mqd_destroy(uq_mgr, queue);
+   idr_remove(_mgr->userq_idr, queue_id);
+   kfree(queue);
+}
+
  static int
  amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id)
  {
struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct amdgpu_userq_mgr *uq_mgr = >userq_mgr;
-   const struct amdgpu_userq_funcs *uq_funcs;
struct amdgpu_usermode_queue *queue;
  
  	mutex_lock(_mgr->userq_mutex);

@@ -73,11 +84,8 @@ amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id)
mutex_unlock(_mgr->userq_mutex);
return -EINVAL;
}
-   uq_funcs = uq_mgr->userq_funcs[queue->queue_type];
-   uq_funcs->mqd_destroy(uq_mgr, queue);
-   idr_remove(_mgr->userq_idr, queue_id);
-   kfree(queue);
  
+	amdgpu_userqueue_cleanup(uq_mgr, queue, queue_id);

mutex_unlock(_mgr->userq_mutex);
return 0;
  }
@@ -193,8 +201,21 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr 
*userq_mgr, struct amdgpu_devi
return 0;
  }
  
+static int amdgpu_userqueue_cleanup_residue(int queue_id, void *ptr, void *data)

+{
+   struct amdgpu_userq_mgr *uq_mgr = data;
+   struct amdgpu_usermode_queue *queue = ptr;
+
+   amdgpu_userqueue_cleanup(uq_mgr, queue, queue_id);
+   return 0;
+}
+
  void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr)
  {
+   idr_for_each(_mgr->userq_idr,
+amdgpu_userqueue_cleanup_residue,
+userq_mgr);
+


Better use idr_for_each_entry() here which avoids the mid layer function.

Apart from that it would be nice to have to merge this patch into the 
original one adding the user queues, but really only nice to have.


Christian.


idr_destroy(_mgr->userq_idr);
mutex_destroy(_mgr->userq_mutex);
  }




Re: [PATCH v5 08/10] drm/amdgpu: generate doorbell index for userqueue

2023-07-07 Thread Christian König

Am 06.07.23 um 14:36 schrieb Shashank Sharma:

The userspace sends us the doorbell object and the relative doobell
index in the object to be used for the usermode queue, but the FW
expects the absolute doorbell index on the PCI BAR in the MQD. This
patch adds a function to convert this relative doorbell index to
absolute doorbell index.

This patch is dependent on the doorbell manager series:
Link: https://patchwork.freedesktop.org/series/115802/

V5: Fix the db object reference leak (Christian)

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 34 +++
  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c|  1 +
  2 files changed, 35 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index bb774144c372..61064266c4f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -32,6 +32,31 @@ amdgpu_userqueue_find(struct amdgpu_userq_mgr *uq_mgr, int 
qid)
return idr_find(_mgr->userq_idr, qid);
  }
  
+static uint64_t

+amdgpu_userqueue_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
+struct amdgpu_usermode_queue *queue,
+struct drm_file *filp,
+uint32_t doorbell_offset)
+{
+   struct drm_gem_object *gobj;
+   struct amdgpu_bo *db_bo;
+   uint64_t index;
+
+   gobj = drm_gem_object_lookup(filp, queue->doorbell_handle);
+   if (gobj == NULL) {
+   DRM_ERROR("Can't find GEM object for doorbell\n");
+   return -EINVAL;
+   }
+
+   db_bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
+   drm_gem_object_put(gobj);
+
+   index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_bo, 
doorbell_offset);


This can only be done with the doorbell BO locked and as soon as you 
unlock it the value becomes invalid unless you pin the BO.


Regards,
Christian.


+   amdgpu_bo_unref(_bo);
+   DRM_DEBUG_DRIVER("[Usermode queues] doorbell index=%lld\n", index);
+   return index;
+}
+
  static int
  amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id)
  {
@@ -64,6 +89,7 @@ amdgpu_userqueue_create(struct drm_file *filp, union 
drm_amdgpu_userq *args)
struct amdgpu_userq_mgr *uq_mgr = >userq_mgr;
const struct amdgpu_userq_funcs *uq_funcs;
struct amdgpu_usermode_queue *queue;
+   uint64_t index;
int qid, r = 0;
  
  	mutex_lock(_mgr->userq_mutex);

@@ -87,6 +113,14 @@ amdgpu_userqueue_create(struct drm_file *filp, union 
drm_amdgpu_userq *args)
queue->flags = args->in.flags;
queue->vm = >vm;
  
+	/* Convert relative doorbell offset into absolute doorbell index */

+   index = amdgpu_userqueue_get_doorbell_index(uq_mgr, queue, filp, 
args->in.doorbell_offset);
+   if (index == (uint64_t)-EINVAL) {
+   DRM_ERROR("Failed to get doorbell for queue\n");
+   goto unlock;
+   }
+   queue->doorbell_index = index;
+
r = uq_funcs->mqd_create(uq_mgr, >in, queue);
if (r) {
DRM_ERROR("Failed to create Queue\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index afaeecb9940a..8edb020683a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -6719,6 +6719,7 @@ static int gfx_v11_0_userq_mqd_create(struct 
amdgpu_userq_mgr *uq_mgr,
userq_props.queue_size = mqd_user.queue_size;
userq_props.hqd_base_gpu_addr = mqd_user.queue_va;
userq_props.mqd_gpu_addr = queue->mqd.gpu_addr;
+   userq_props.doorbell_index = queue->doorbell_index;
userq_props.use_doorbell = true;
  
  	r = mqd_gfx_generic->init_mqd(adev, (void *)queue->mqd.cpu_ptr, _props);




Re: [PATCH v4] drm/amdgpu:update kernel vcn ring test

2023-07-07 Thread Christian König




Am 06.07.23 um 16:47 schrieb Saleemkhan Jamadar:

add session context buffer to decoder ring test.

v4 - data type, explain change of ib size change (Christian)
v3 - indent and  v2 changes correction. (Christian)
v2 - put the buffer at the end of the IB (Christian)

Signed-off-by: Saleemkhan Jamadar 
Acked-by: Leo Liu 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 11 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h |  5 -
  2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 2d94f1b63bd6..9bdfe665f603 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -573,7 +573,8 @@ static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring 
*ring, uint32_t hand
int r, i;
  
  	memset(ib, 0, sizeof(*ib));

-   r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2,
+   /* 34 pages : 128KiB  session context buffer size and 8KiB ib msg */
+   r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 34,
AMDGPU_IB_POOL_DIRECT,
ib);
if (r)
@@ -608,7 +609,8 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct 
amdgpu_ring *ring, uint32_t han
int r, i;
  
  	memset(ib, 0, sizeof(*ib));

-   r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2,
+   /* 34 pages : 128KB  session context buffer size and 8KB ib msg */
+   r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 34,
AMDGPU_IB_POOL_DIRECT,
ib);


One more question here: Does the create and destroy message need to 
point to the same session context buffer or is it ok that we use a 
separate dummy for both?


Either way we should probably clear the context buffer with zeros.

Apart from that this now looks good to me,
Christian.


if (r)
@@ -700,6 +702,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring 
*ring,
struct amdgpu_job *job;
struct amdgpu_ib *ib;
uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
+   uint64_t session_ctx_buf_gaddr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr 
+ 8192);
bool sq = amdgpu_vcn_using_unified_queue(ring);
uint32_t *ib_checksum;
uint32_t ib_pack_in_dw;
@@ -730,6 +733,10 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring 
*ring,
ib->length_dw += sizeof(struct amdgpu_vcn_decode_buffer) / 4;
memset(decode_buffer, 0, sizeof(struct amdgpu_vcn_decode_buffer));
  
+	decode_buffer->valid_buf_flag |=

+   
cpu_to_le32(AMDGPU_VCN_CMD_FLAG_SESSION_CONTEXT_BUFFER);
+   decode_buffer->session_context_buffer_address_hi = 
upper_32_bits(session_ctx_buf_gaddr);
+   decode_buffer->session_context_buffer_address_lo = 
lower_32_bits(session_ctx_buf_gaddr);
decode_buffer->valid_buf_flag |= 
cpu_to_le32(AMDGPU_VCN_CMD_FLAG_MSG_BUFFER);
decode_buffer->msg_buffer_address_hi = cpu_to_le32(addr >> 32);
decode_buffer->msg_buffer_address_lo = cpu_to_le32(addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index f1397ef66fd7..2df43cd76c10 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -166,6 +166,7 @@
  
  #define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER	0x0001

  #define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER0x0001
+#define AMDGPU_VCN_CMD_FLAG_SESSION_CONTEXT_BUFFER 0x0010
  
  #define VCN_CODEC_DISABLE_MASK_AV1  (1 << 0)

  #define VCN_CODEC_DISABLE_MASK_VP9  (1 << 1)
@@ -357,7 +358,9 @@ struct amdgpu_vcn_decode_buffer {
uint32_t valid_buf_flag;
uint32_t msg_buffer_address_hi;
uint32_t msg_buffer_address_lo;
-   uint32_t pad[30];
+   uint32_t session_context_buffer_address_hi;
+   uint32_t session_context_buffer_address_lo;
+   uint32_t pad[28];
  };
  
  #define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80




Re: [PATCH] drm/amdgpu: Always emit GDS switch when GDS/GWS/OA is used

2023-07-07 Thread Christian König




Am 07.07.23 um 08:28 schrieb Friedrich Vock:

During gfxoff, the per-VMID GDS registers are reset and not restored
afterwards.


Hui? Since when? Those registers should be part of the saved ones.

Have you found that by observation?

Thanks,
Christian.



  The kernel needs to emit a GDS switch to manually update the
GWS registers in this case. Since gfxoff can happen between any two
submissions and the kernel has no way of knowing, emit the GDS switch
before every submission.

Fixes: 56b0989e29 ("drm/amdgpu: fix GDS/GWS/OA switch handling")
Cc: sta...@vger.kernel.org
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2530
Signed-off-by: Friedrich Vock 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 22 +++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.h |  1 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 10 --
  3 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index ff1ea99292fb..de73797e9279 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -165,24 +165,17 @@ bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
atomic_read(>gpu_reset_counter);
  }

-/* Check if we need to switch to another set of resources */
-static bool amdgpu_vmid_gds_switch_needed(struct amdgpu_vmid *id,
- struct amdgpu_job *job)
-{
-   return id->gds_base != job->gds_base ||
-   id->gds_size != job->gds_size ||
-   id->gws_base != job->gws_base ||
-   id->gws_size != job->gws_size ||
-   id->oa_base != job->oa_base ||
-   id->oa_size != job->oa_size;
-}
-
  /* Check if the id is compatible with the job */
  static bool amdgpu_vmid_compatible(struct amdgpu_vmid *id,
   struct amdgpu_job *job)
  {
return  id->pd_gpu_addr == job->vm_pd_addr &&
-   !amdgpu_vmid_gds_switch_needed(id, job);
+   id->gds_base == job->gds_base &&
+   id->gds_size == job->gds_size &&
+   id->gws_base == job->gws_base &&
+   id->gws_size == job->gws_size &&
+   id->oa_base == job->oa_base &&
+   id->oa_size == job->oa_size;
  }

  /**
@@ -434,7 +427,6 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct 
amdgpu_ring *ring,
list_move_tail(>list, _mgr->ids_lru);
}

-   job->gds_switch_needed = amdgpu_vmid_gds_switch_needed(id, job);
if (job->vm_needs_flush) {
id->flushed_updates = amdgpu_vm_tlb_seq(vm);
dma_fence_put(id->last_flush);
@@ -503,7 +495,7 @@ void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
   * @vmhub: vmhub type
   * @vmid: vmid number to use
   *
- * Reset saved GDW, GWS and OA to force switch on next flush.
+ * Reset saved GDS, GWS and OA data.
   */
  void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
   unsigned vmid)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index a963a25ddd62..2898508b1ce4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -53,7 +53,6 @@ struct amdgpu_job {
uint32_tpreamble_status;
uint32_tpreemption_status;
boolvm_needs_flush;
-   boolgds_switch_needed;
boolspm_update_needed;
uint64_tvm_pd_addr;
unsignedvmid;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 291977b93b1d..61856040cae2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -557,6 +557,12 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device 
*adev)
}
  }

+/* Check if the job needs a GDS switch */
+static bool amdgpu_vm_need_gds_switch(struct amdgpu_job *job)
+{
+   return job->gds_size || job->gws_size || job->oa_size;
+}
+
  /**
   * amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for job.
   *
@@ -579,7 +585,7 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
if (job->vm_needs_flush || ring->has_compute_vm_bug)
return true;

-   if (ring->funcs->emit_gds_switch && job->gds_switch_needed)
+   if (ring->funcs->emit_gds_switch && amdgpu_vm_need_gds_switch(job))
return true;

if (amdgpu_vmid_had_gpu_reset(adev, _mgr->ids[job->vmid]))
@@ -609,7 +615,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct 
amdgpu_job *job,
struct amdgpu_vmid *id = _mgr->ids[job->vmid];
bool spm_update_needed = job->spm_update_needed;
bool gds_switch_needed = ring->funcs->emit_gds_switch &&
-   job->gds_switch_needed;
+   

[PATCH] drm/amdgpu: Always emit GDS switch when GDS/GWS/OA is used

2023-07-07 Thread Friedrich Vock
During gfxoff, the per-VMID GDS registers are reset and not restored
afterwards. The kernel needs to emit a GDS switch to manually update the
GWS registers in this case. Since gfxoff can happen between any two
submissions and the kernel has no way of knowing, emit the GDS switch
before every submission.

Fixes: 56b0989e29 ("drm/amdgpu: fix GDS/GWS/OA switch handling")
Cc: sta...@vger.kernel.org
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2530
Signed-off-by: Friedrich Vock 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 22 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 10 --
 3 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index ff1ea99292fb..de73797e9279 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -165,24 +165,17 @@ bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
atomic_read(>gpu_reset_counter);
 }

-/* Check if we need to switch to another set of resources */
-static bool amdgpu_vmid_gds_switch_needed(struct amdgpu_vmid *id,
- struct amdgpu_job *job)
-{
-   return id->gds_base != job->gds_base ||
-   id->gds_size != job->gds_size ||
-   id->gws_base != job->gws_base ||
-   id->gws_size != job->gws_size ||
-   id->oa_base != job->oa_base ||
-   id->oa_size != job->oa_size;
-}
-
 /* Check if the id is compatible with the job */
 static bool amdgpu_vmid_compatible(struct amdgpu_vmid *id,
   struct amdgpu_job *job)
 {
return  id->pd_gpu_addr == job->vm_pd_addr &&
-   !amdgpu_vmid_gds_switch_needed(id, job);
+   id->gds_base == job->gds_base &&
+   id->gds_size == job->gds_size &&
+   id->gws_base == job->gws_base &&
+   id->gws_size == job->gws_size &&
+   id->oa_base == job->oa_base &&
+   id->oa_size == job->oa_size;
 }

 /**
@@ -434,7 +427,6 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct 
amdgpu_ring *ring,
list_move_tail(>list, _mgr->ids_lru);
}

-   job->gds_switch_needed = amdgpu_vmid_gds_switch_needed(id, job);
if (job->vm_needs_flush) {
id->flushed_updates = amdgpu_vm_tlb_seq(vm);
dma_fence_put(id->last_flush);
@@ -503,7 +495,7 @@ void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
  * @vmhub: vmhub type
  * @vmid: vmid number to use
  *
- * Reset saved GDW, GWS and OA to force switch on next flush.
+ * Reset saved GDS, GWS and OA data.
  */
 void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
   unsigned vmid)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index a963a25ddd62..2898508b1ce4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -53,7 +53,6 @@ struct amdgpu_job {
uint32_tpreamble_status;
uint32_tpreemption_status;
boolvm_needs_flush;
-   boolgds_switch_needed;
boolspm_update_needed;
uint64_tvm_pd_addr;
unsignedvmid;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 291977b93b1d..61856040cae2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -557,6 +557,12 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device 
*adev)
}
 }

+/* Check if the job needs a GDS switch */
+static bool amdgpu_vm_need_gds_switch(struct amdgpu_job *job)
+{
+   return job->gds_size || job->gws_size || job->oa_size;
+}
+
 /**
  * amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for job.
  *
@@ -579,7 +585,7 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
if (job->vm_needs_flush || ring->has_compute_vm_bug)
return true;

-   if (ring->funcs->emit_gds_switch && job->gds_switch_needed)
+   if (ring->funcs->emit_gds_switch && amdgpu_vm_need_gds_switch(job))
return true;

if (amdgpu_vmid_had_gpu_reset(adev, _mgr->ids[job->vmid]))
@@ -609,7 +615,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct 
amdgpu_job *job,
struct amdgpu_vmid *id = _mgr->ids[job->vmid];
bool spm_update_needed = job->spm_update_needed;
bool gds_switch_needed = ring->funcs->emit_gds_switch &&
-   job->gds_switch_needed;
+   amdgpu_vm_need_gds_switch(job);
bool vm_flush_needed = job->vm_needs_flush;
struct dma_fence *fence = NULL;
bool pasid_mapping_needed = false;
--
2.41.0