Add queue remapping to ensure that any waves executing the PC sampling
part of the trap handler are done before kfd_pc_sample_stop returns,
and that no new waves enter that part of the trap handler afterwards.
This avoids race conditions that could lead to use-after-free. Unmapping
and remapping the queues either waits for the waves to drain, or preempts
them with CWSR, which itself executes a trap and waits for previous traps
to finish.

Signed-off-by: James Zhu <james....@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 11 +++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h |  5 +++++
 drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c          |  3 +++
 3 files changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index c0e71543389a..a3f57be63f4f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -3155,6 +3155,17 @@ int debug_refresh_runlist(struct device_queue_manager 
*dqm)
        return debug_map_and_unlock(dqm);
 }
 
+void remap_queue(struct device_queue_manager *dqm,
+                               enum kfd_unmap_queues_filter filter,
+                               uint32_t filter_param,
+                               uint32_t grace_period)
+{
+       dqm_lock(dqm);
+       if (!dqm->dev->kfd->shared_resources.enable_mes)
+               execute_queues_cpsch(dqm, filter, filter_param, grace_period);
+       dqm_unlock(dqm);
+}
+
 #if defined(CONFIG_DEBUG_FS)
 
 static void seq_reg_dump(struct seq_file *m,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index cf7e182588f8..f8aae3747a36 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -303,6 +303,11 @@ int debug_lock_and_unmap(struct device_queue_manager *dqm);
 int debug_map_and_unlock(struct device_queue_manager *dqm);
 int debug_refresh_runlist(struct device_queue_manager *dqm);
 
+void remap_queue(struct device_queue_manager *dqm,
+                               enum kfd_unmap_queues_filter filter,
+                               uint32_t filter_param,
+                               uint32_t grace_period);
+
 static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
 {
        return (pdd->lds_base >> 16) & 0xFF;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c
index 02fa481d7457..c9fd5b2a3330 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c
@@ -24,6 +24,7 @@
 #include "kfd_priv.h"
 #include "amdgpu_amdkfd.h"
 #include "kfd_pc_sampling.h"
+#include "kfd_device_queue_manager.h"
 
 struct supported_pc_sample_info {
        uint32_t ip_version;
@@ -105,6 +106,8 @@ static int kfd_pc_sample_stop(struct kfd_process_device 
*pdd,
        if (pc_sampling_stop) {
                kfd_process_set_trap_pc_sampling_flag(&pdd->qpd,
                        
pdd->dev->pcs_data.hosttrap_entry.base.pc_sample_info.method, false);
+               remap_queue(pdd->dev->dqm,
+                       KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 
USE_DEFAULT_GRACE_PERIOD);
 
                mutex_lock(&pdd->dev->pcs_data.mutex);
                pdd->dev->pcs_data.hosttrap_entry.base.target_simd = 0;
-- 
2.25.1

Reply via email to