From: Mukul Joshi <[email protected]>

Currently, we do not turn off retry faults in VM_CONTEXT_CNTL value
when passing it to MES if XNACK is off. This creates a situation where
XNACK is disabled in SQ but enabled in UTCL2, which is not recommended.
As a result, turn off/on retry faults in both SQ and UTCL2 when passing
vm_context_cntl value to MES if XNACK is disabled/enabled.

Suggested-by: Jay Cornwall <[email protected]>
Signed-off-by: Mukul Joshi <[email protected]>
Reviewed-by: Philip Yang <[email protected]>
Signed-off-by: Alex Deucher <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h              |  1 +
 drivers/gpu/drm/amd/amdgpu/mes_v12_1.c               |  5 +----
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c    |  1 +
 .../drm/amd/amdkfd/kfd_device_queue_manager_v12_1.c  | 12 ++++++++++--
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h                |  1 +
 5 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index f99de1592921c..88685c58798e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -254,6 +254,7 @@ struct mes_add_queue_input {
        uint32_t        queue_size;
        uint32_t        exclusively_scheduled;
        uint32_t        sh_mem_config_data;
+       uint32_t        vm_cntx_cntl;
 };
 
 struct mes_remove_queue_input {
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c
index 15b467b4aae40..913ce414ca9bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c
@@ -286,10 +286,7 @@ static int convert_to_mes_queue_type(int queue_type)
 static int mes_v12_1_add_hw_queue(struct amdgpu_mes *mes,
                                  struct mes_add_queue_input *input)
 {
-       struct amdgpu_device *adev = mes->adev;
        union MESAPI__ADD_QUEUE mes_add_queue_pkt;
-       struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
-       uint32_t vm_cntx_cntl = hub->vm_cntx_cntl;
        int xcc_id = input->xcc_id;
        int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE);
 
@@ -322,7 +319,7 @@ static int mes_v12_1_add_hw_queue(struct amdgpu_mes *mes,
        mes_add_queue_pkt.queue_type =
                convert_to_mes_queue_type(input->queue_type);
        mes_add_queue_pkt.paging = input->paging;
-       mes_add_queue_pkt.vm_context_cntl = vm_cntx_cntl;
+       mes_add_queue_pkt.vm_context_cntl = input->vm_cntx_cntl;
        mes_add_queue_pkt.gws_base = input->gws_base;
        mes_add_queue_pkt.gws_size = input->gws_size;
        mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 824d73ab5b283..1dbdf9465e590 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -256,6 +256,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, 
struct queue *q,
 
        queue_input.exclusively_scheduled = q->properties.is_gws;
        queue_input.sh_mem_config_data = qpd->sh_mem_config;
+       queue_input.vm_cntx_cntl = qpd->vm_cntx_cntl;
 
        amdgpu_mes_lock(&adev->mes);
        r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12_1.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12_1.c
index e4ec7190fea14..9e70a5f8a50b0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12_1.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12_1.c
@@ -52,8 +52,11 @@ static int update_qpd_v12_1(struct device_queue_manager *dqm,
                         struct qcm_process_device *qpd)
 {
        struct kfd_process_device *pdd;
+       struct amdgpu_device *adev = dqm->dev->adev;
+       struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
 
        pdd = qpd_to_pdd(qpd);
+       qpd->vm_cntx_cntl = hub->vm_cntx_cntl;
 
        /* check if sh_mem_config register already configured */
        if (qpd->sh_mem_config == 0) {
@@ -69,10 +72,15 @@ static int update_qpd_v12_1(struct device_queue_manager 
*dqm,
        }
 
        if (KFD_SUPPORT_XNACK_PER_PROCESS(dqm->dev)) {
-               if (!pdd->process->xnack_enabled)
+               if (!pdd->process->xnack_enabled) {
                        qpd->sh_mem_config |= 1 << 
SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
-               else
+                       qpd->vm_cntx_cntl &=
+                       ~(1 << 
GCVM_CONTEXT0_CNTL__RETRY_PERMISSION_OR_INVALID_PAGE_FAULT__SHIFT);
+               } else {
                        qpd->sh_mem_config &= ~(1 << 
SH_MEM_CONFIG__RETRY_DISABLE__SHIFT);
+                       qpd->vm_cntx_cntl |=
+                       (1 << 
GCVM_CONTEXT0_CNTL__RETRY_PERMISSION_OR_INVALID_PAGE_FAULT__SHIFT);
+               }
        }
 
        qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 7b70f794f3d0c..ebc637c38c04a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -697,6 +697,7 @@ struct qcm_process_device {
        uint32_t num_gws;
        uint32_t num_oac;
        uint32_t sh_hidden_private_base;
+       uint32_t vm_cntx_cntl;
 
        /* CWSR memory */
        struct kgd_mem *cwsr_mem;
-- 
2.51.1

Reply via email to