since gfx 9.4.3 HW is calculating accumulated activity counter
per-queue in register sdmax_rlcx_utilization_hi/lo, CPFW adds it in
sdma MQD for save/restore, KFD will read it from there. gfx 9.4.2
will still keep the way to read from memory at rptr+8.

v2: read dynamic counter directly from utilization register

Signed-off-by: Eric Huang <[email protected]>
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   | 51 ++++++++++++++++++-
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 23 +++++++--
 drivers/gpu/drm/amd/amdkfd/kfd_process.c      | 13 ++++-
 .../include/asic_reg/sdma/sdma_4_4_2_offset.h |  4 ++
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |  2 +
 drivers/gpu/drm/amd/include/v9_structs.h      |  4 +-
 6 files changed, 89 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
index f46c59118304..16bad244c091 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
@@ -35,6 +35,8 @@
 #include "sdma/sdma_4_4_2_sh_mask.h"
 #include <uapi/linux/kfd_ioctl.h>
 
+#define SDMA_QUEUES_NUM_PER_ENG        8
+
 static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
 {
        return (struct v9_sdma_mqd *)mqd;
@@ -584,6 +586,52 @@ static uint32_t kgd_v9_4_3_ptl_ctrl(struct amdgpu_device 
*adev,
                        ptl_state, fmt1, fmt2);
 }
 
+static int kgd_gfx_v9_4_3_hqd_sdma_get_counter(struct amdgpu_device *adev,
+                                       void *mqd, uint64_t *val)
+{
+       struct v9_sdma_mqd *m = get_sdma_mqd(mqd);
+       uint32_t sdma_rlc_reg_offset;
+       uint32_t sdma_rlc_rb_cntl;
+       uint32_t engine_id, queue_id;
+       uint32_t engines = adev->sdma.num_instances;
+       uint32_t sdma_rlcx_rb_base, sdma_rlcx_rb_base_hi;
+       bool found = false;
+
+       if (!m)
+               return -EINVAL;
+
+       for (engine_id = 0; engine_id < engines && !found; engine_id++) {
+               for (queue_id = 0; queue_id < SDMA_QUEUES_NUM_PER_ENG; 
queue_id++) {
+                       sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+                                               engine_id, queue_id);
+                       sdma_rlcx_rb_base = RREG32(sdma_rlc_reg_offset +
+                                               regSDMA_RLC0_RB_BASE);
+                       sdma_rlcx_rb_base_hi = RREG32(sdma_rlc_reg_offset +
+                                               regSDMA_RLC0_RB_BASE_HI);
+
+                       if (m->sdmax_rlcx_rb_base == sdma_rlcx_rb_base &&
+                           m->sdmax_rlcx_rb_base_hi == sdma_rlcx_rb_base_hi) {
+                               found = true;
+                               break;
+                       }
+               }
+       }
+
+       sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL);
+
+       /* Read sdma activity counter from utilization register
+        * if hw queue is enabled, otherwise read from MQD.
+        */
+       if (sdma_rlc_rb_cntl & SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK)
+               *val = (uint64_t)RREG32(sdma_rlc_reg_offset + 
regSDMA_RLC0_UTILIZATION_HI) << 32 |
+                       RREG32(sdma_rlc_reg_offset + 
regSDMA_RLC0_UTILIZATION_LO);
+       else
+               *val = (uint64_t)m->sdmax_rlcx_utilization_hi << 32 |
+                       m->sdmax_rlcx_utilization_lo;
+
+       return 0;
+}
+
 const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = {
        .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
        .set_pasid_vmid_mapping = kgd_gfx_v9_4_3_set_pasid_vmid_mapping,
@@ -623,5 +671,6 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = {
        .trigger_pc_sample_trap = kgd_v9_4_3_trigger_pc_sample_trap,
        .override_core_cg = kgd_gfx_v9_4_3_override_core_cg,
        .setup_stoch_sampling = kgd_v9_4_3_setup_stoch_sampling,
-       .ptl_ctrl = kgd_v9_4_3_ptl_ctrl
+       .ptl_ctrl = kgd_v9_4_3_ptl_ctrl,
+       .hqd_sdma_get_counter = kgd_gfx_v9_4_3_hqd_sdma_get_counter
 };
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index b934863312d0..a65161659f74 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1067,8 +1067,15 @@ static int destroy_queue_nocpsch(struct 
device_queue_manager *dqm,
        /* Get the SDMA queue stats */
        if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
            (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
-               retval = read_sdma_queue_counter((uint64_t __user 
*)q->properties.read_ptr,
-                                                       &sdma_val);
+               if ((KFD_GC_VERSION(dqm->dev) <= IP_VERSION(9, 4, 2)))
+                       retval = read_sdma_queue_counter(
+                                       (uint64_t __user 
*)q->properties.read_ptr,
+                                       &sdma_val);
+               else
+                       retval = dqm->dev->kfd2kgd->hqd_sdma_get_counter ?
+                                dqm->dev->kfd2kgd->hqd_sdma_get_counter(
+                                       dqm->dev->adev, q->mqd, &sdma_val) :
+                                0;
                if (retval)
                        dev_err(dev, "Failed to read SDMA queue counter for 
queue: %d\n",
                                q->properties.queue_id);
@@ -2728,8 +2735,16 @@ static int destroy_queue_cpsch(struct 
device_queue_manager *dqm,
        /* Get the SDMA queue stats */
        if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
            (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
-               retval = read_sdma_queue_counter((uint64_t __user 
*)q->properties.read_ptr,
-                                                       &sdma_val);
+               if (KFD_GC_VERSION(dqm->dev) <= IP_VERSION(9, 4, 2))
+                       retval = read_sdma_queue_counter(
+                                       (uint64_t __user 
*)q->properties.read_ptr,
+                                       &sdma_val);
+               else
+                       retval = dqm->dev->kfd2kgd->hqd_sdma_get_counter ?
+                                dqm->dev->kfd2kgd->hqd_sdma_get_counter(
+                                       dqm->dev->adev, q->mqd, &sdma_val) :
+                                0;
+
                if (retval)
                        dev_err(dev, "Failed to read SDMA queue counter for 
queue: %d\n",
                                q->properties.queue_id);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 0be2fd04e6d0..911f974e6bf5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -95,6 +95,7 @@ struct kfd_sdma_activity_handler_workarea {
 
 struct temp_sdma_queue_list {
        uint64_t __user *rptr;
+       void *mqd;
        uint64_t sdma_val;
        unsigned int queue_id;
        struct list_head list;
@@ -165,6 +166,7 @@ static void kfd_sdma_activity_worker(struct work_struct 
*work)
 
                INIT_LIST_HEAD(&sdma_q->list);
                sdma_q->rptr = (uint64_t __user *)q->properties.read_ptr;
+               sdma_q->mqd = q->mqd;
                sdma_q->queue_id = q->properties.queue_id;
                list_add_tail(&sdma_q->list, &sdma_q_list.list);
        }
@@ -193,7 +195,16 @@ static void kfd_sdma_activity_worker(struct work_struct 
*work)
 
        list_for_each_entry(sdma_q, &sdma_q_list.list, list) {
                val = 0;
-               ret = read_sdma_queue_counter(sdma_q->rptr, &val);
+
+               if ((KFD_GC_VERSION(dqm->dev) <= IP_VERSION(9, 4, 2)))
+                       ret = read_sdma_queue_counter(sdma_q->rptr, &val);
+               else
+                       ret = dqm->dev->kfd2kgd->hqd_sdma_get_counter ?
+                             dqm->dev->kfd2kgd->hqd_sdma_get_counter(
+                                       dqm->dev->adev,
+                                       sdma_q->mqd, &val) :
+                             0;
+
                if (ret) {
                        pr_debug("Failed to read SDMA queue active counter for 
queue id: %d",
                                 sdma_q->queue_id);
diff --git a/drivers/gpu/drm/amd/include/asic_reg/sdma/sdma_4_4_2_offset.h 
b/drivers/gpu/drm/amd/include/asic_reg/sdma/sdma_4_4_2_offset.h
index ead81aeffd67..8700f8190c7c 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/sdma/sdma_4_4_2_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/sdma/sdma_4_4_2_offset.h
@@ -493,6 +493,10 @@
 #define regSDMA_RLC0_MIDCMD_DATA10_BASE_IDX                                    
                         0
 #define regSDMA_RLC0_MIDCMD_CNTL                                               
                         0x017b
 #define regSDMA_RLC0_MIDCMD_CNTL_BASE_IDX                                      
                         0
+#define regSDMA_RLC0_UTILIZATION_LO                                            
                        0x017c
+#define regSDMA_RLC0_UTILIZATION_LO_BASE_IDX                                   
                        0
+#define regSDMA_RLC0_UTILIZATION_HI                                            
                        0x017d
+#define regSDMA_RLC0_UTILIZATION_HI_BASE_IDX                                   
                        0
 #define regSDMA_RLC1_RB_CNTL                                                   
                         0x0188
 #define regSDMA_RLC1_RB_CNTL_BASE_IDX                                          
                         0
 #define regSDMA_RLC1_RB_BASE                                                   
                         0x0189
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h 
b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index d34c869b182f..f3220794c108 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -361,6 +361,8 @@ struct kfd2kgd_calls {
                             uint32_t *ptl_state,
                             enum amdgpu_ptl_fmt *fmt1,
                             enum amdgpu_ptl_fmt *fmt2);
+       int (*hqd_sdma_get_counter)(struct amdgpu_device *adev,
+                                   void *mqd, uint64_t *val);
 };
 
 #endif /* KGD_KFD_INTERFACE_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/include/v9_structs.h 
b/drivers/gpu/drm/amd/include/v9_structs.h
index a2f81b9c38af..e0d387f08576 100644
--- a/drivers/gpu/drm/amd/include/v9_structs.h
+++ b/drivers/gpu/drm/amd/include/v9_structs.h
@@ -69,8 +69,8 @@ struct v9_sdma_mqd {
        uint32_t sdmax_rlcx_midcmd_cntl;
        uint32_t reserved_42;
        uint32_t reserved_43;
-       uint32_t reserved_44;
-       uint32_t reserved_45;
+       uint32_t sdmax_rlcx_utilization_lo;
+       uint32_t sdmax_rlcx_utilization_hi;
        uint32_t reserved_46;
        uint32_t reserved_47;
        uint32_t reserved_48;
-- 
2.34.1

Reply via email to