Re: [PATCH 12/33] drm/amdgpu: add configurable grace period for unmap queues

2023-05-30 Thread Felix Kuehling



Am 2023-05-25 um 13:27 schrieb Jonathan Kim:
|diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c index 
faf4772ed317..a0cfd57ea84a 100644 --- 
a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c +++ 
b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c @@ -303,6 +303,7 
@@ const struct packet_manager_funcs kfd_vi_pm_funcs = { 
.set_resources = pm_set_resources_vi, .map_queues = pm_map_queues_vi, 
.unmap_queues = pm_unmap_queues_vi, + .set_grace_period = NULL, 
.query_status = pm_query_status_vi, .release_mem = pm_release_mem_vi, 
.map_process_size = sizeof(struct pm4_mes_map_process), @@ -310,6 
+311,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = { 
.set_resources_size = sizeof(struct pm4_mes_set_resources), 
.map_queues_size = sizeof(struct pm4_mes_map_queues), 
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), + 
.set_grace_period = 0,|


|Did you mean set_grace_period_size here? With that fixed, the patch is|

|Reviewed-by: Felix Kuehling 
|


|.query_status_size = sizeof(struct pm4_mes_query_status), 
.release_mem_size = sizeof(struct pm4_mec_release_mem)|


[PATCH 12/33] drm/amdgpu: add configurable grace period for unmap queues

2023-05-25 Thread Jonathan Kim
The HWS schedule allows a grace period for wave completion prior to
preemption for better performance by avoiding CWSR on waves that can
potentially complete quickly. The debugger, on the other hand, will
want to inspect wave status immediately after it actively triggers
preemption (a suspend function to be provided).

To minimize latency between preemption and debugger wave inspection, allow
immediate preemption by setting the grace period to 0.

Note that setting the preepmtion grace period to 0 will result in an
infinite grace period being set due to a CP FW bug so set it to 1 for now.

v2: add null grace period function pointers to VI packet manager.

Signed-off-by: Jonathan Kim 
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  |  2 +
 .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |  2 +
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 43 
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h|  6 ++
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c  |  2 +
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 43 
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |  8 ++-
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 63 +-
 .../drm/amd/amdkfd/kfd_device_queue_manager.h |  3 +
 .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 32 +
 .../drm/amd/amdkfd/kfd_packet_manager_v9.c| 39 +++
 .../drm/amd/amdkfd/kfd_packet_manager_vi.c|  2 +
 .../gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h   | 65 +++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  5 ++
 14 files changed, 295 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
index a6f98141c29c..b811a0985050 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -82,5 +82,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
.enable_debug_trap = kgd_aldebaran_enable_debug_trap,
.disable_debug_trap = kgd_aldebaran_disable_debug_trap,
+   .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
+   .build_grace_period_packet_info = 
kgd_gfx_v9_build_grace_period_packet_info,
.program_trap_handler_settings = 
kgd_gfx_v9_program_trap_handler_settings,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index d2918e5c0dea..a62bd0068515 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -410,6 +410,8 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
kgd_gfx_v9_set_vm_context_page_table_base,
.enable_debug_trap = kgd_arcturus_enable_debug_trap,
.disable_debug_trap = kgd_arcturus_disable_debug_trap,
+   .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
+   .build_grace_period_packet_info = 
kgd_gfx_v9_build_grace_period_packet_info,
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
.program_trap_handler_settings = 
kgd_gfx_v9_program_trap_handler_settings
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 240f5006e278..98006c7021dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -803,6 +803,47 @@ uint32_t kgd_gfx_v10_disable_debug_trap(struct 
amdgpu_device *adev,
return 0;
 }
 
+/* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
+ * The values read are:
+ * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
+ * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads.
+ * wrm_offload_wait_time-- Wait Count for WAIT_REG_MEM Offloads.
+ * gws_wait_time-- Wait Count for Global Wave Syncs.
+ * que_sleep_wait_time  -- Wait Count for Dequeue Retry.
+ * sch_wave_wait_time   -- Wait Count for Scheduling Wave Message.
+ * sem_rearm_wait_time  -- Wait Count for Semaphore re-arm.
+ * deq_retry_wait_time  -- Wait Count for Global Wave Syncs.
+ */
+void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
+   uint32_t *wait_times)
+
+{
+   *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
+}
+
+void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
+   uint32_t wait_times,
+   uint32_t grace_period,
+   uint32_t *reg_offset,
+   uint32_t *reg_data)
+{
+   *reg_data = wait_times;
+
+   /*
+* The CP cannont handle a 0 grace period input and will result in
+* an infinite grace period being set so set to