since gfx 9.4.3 HW is calculating accumulated activity counter per-queue in register sdmax_rlcx_utilization_hi/lo, CPFW adds it in sdma MQD for save/restore, KFD will read it from there. gfx 9.4.2 will still keep the way to read from memory at rptr+8.
v2: read dynamic counter directly from utilization register Signed-off-by: Eric Huang <[email protected]> --- .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c | 51 ++++++++++++++++++- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 23 +++++++-- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 13 ++++- .../include/asic_reg/sdma/sdma_4_4_2_offset.h | 4 ++ .../gpu/drm/amd/include/kgd_kfd_interface.h | 2 + drivers/gpu/drm/amd/include/v9_structs.h | 4 +- 6 files changed, 89 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c index f46c59118304..16bad244c091 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c @@ -35,6 +35,8 @@ #include "sdma/sdma_4_4_2_sh_mask.h" #include <uapi/linux/kfd_ioctl.h> +#define SDMA_QUEUES_NUM_PER_ENG 8 + static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) { return (struct v9_sdma_mqd *)mqd; @@ -584,6 +586,52 @@ static uint32_t kgd_v9_4_3_ptl_ctrl(struct amdgpu_device *adev, ptl_state, fmt1, fmt2); } +static int kgd_gfx_v9_4_3_hqd_sdma_get_counter(struct amdgpu_device *adev, + void *mqd, uint64_t *val) +{ + struct v9_sdma_mqd *m = get_sdma_mqd(mqd); + uint32_t sdma_rlc_reg_offset; + uint32_t sdma_rlc_rb_cntl; + uint32_t engine_id, queue_id; + uint32_t engines = adev->sdma.num_instances; + uint32_t sdma_rlcx_rb_base, sdma_rlcx_rb_base_hi; + bool found = false; + + if (!m) + return -EINVAL; + + for (engine_id = 0; engine_id < engines && !found; engine_id++) { + for (queue_id = 0; queue_id < SDMA_QUEUES_NUM_PER_ENG; queue_id++) { + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, + engine_id, queue_id); + sdma_rlcx_rb_base = RREG32(sdma_rlc_reg_offset + + regSDMA_RLC0_RB_BASE); + sdma_rlcx_rb_base_hi = RREG32(sdma_rlc_reg_offset + + regSDMA_RLC0_RB_BASE_HI); + + if (m->sdmax_rlcx_rb_base == sdma_rlcx_rb_base && + m->sdmax_rlcx_rb_base_hi == sdma_rlcx_rb_base_hi) { + found = true; + break; + } + } + } + + sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL); + + /* Read sdma activity counter from utilization register + * if hw queue is enabled, otherwise read from MQD. + */ + if (sdma_rlc_rb_cntl & SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK) + *val = (uint64_t)RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_UTILIZATION_HI) << 32 | + RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_UTILIZATION_LO); + else + *val = (uint64_t)m->sdmax_rlcx_utilization_hi << 32 | + m->sdmax_rlcx_utilization_lo; + + return 0; +} + const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = { .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_gfx_v9_4_3_set_pasid_vmid_mapping, @@ -623,5 +671,6 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = { .trigger_pc_sample_trap = kgd_v9_4_3_trigger_pc_sample_trap, .override_core_cg = kgd_gfx_v9_4_3_override_core_cg, .setup_stoch_sampling = kgd_v9_4_3_setup_stoch_sampling, - .ptl_ctrl = kgd_v9_4_3_ptl_ctrl + .ptl_ctrl = kgd_v9_4_3_ptl_ctrl, + .hqd_sdma_get_counter = kgd_gfx_v9_4_3_hqd_sdma_get_counter }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index b934863312d0..a65161659f74 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1067,8 +1067,15 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, /* Get the SDMA queue stats */ if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { - retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, - &sdma_val); + if ((KFD_GC_VERSION(dqm->dev) <= IP_VERSION(9, 4, 2))) + retval = read_sdma_queue_counter( + (uint64_t __user *)q->properties.read_ptr, + &sdma_val); + else + retval = dqm->dev->kfd2kgd->hqd_sdma_get_counter ? + dqm->dev->kfd2kgd->hqd_sdma_get_counter( + dqm->dev->adev, q->mqd, &sdma_val) : + 0; if (retval) dev_err(dev, "Failed to read SDMA queue counter for queue: %d\n", q->properties.queue_id); @@ -2728,8 +2735,16 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, /* Get the SDMA queue stats */ if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { - retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, - &sdma_val); + if (KFD_GC_VERSION(dqm->dev) <= IP_VERSION(9, 4, 2)) + retval = read_sdma_queue_counter( + (uint64_t __user *)q->properties.read_ptr, + &sdma_val); + else + retval = dqm->dev->kfd2kgd->hqd_sdma_get_counter ? + dqm->dev->kfd2kgd->hqd_sdma_get_counter( + dqm->dev->adev, q->mqd, &sdma_val) : + 0; + if (retval) dev_err(dev, "Failed to read SDMA queue counter for queue: %d\n", q->properties.queue_id); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 0be2fd04e6d0..911f974e6bf5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -95,6 +95,7 @@ struct kfd_sdma_activity_handler_workarea { struct temp_sdma_queue_list { uint64_t __user *rptr; + void *mqd; uint64_t sdma_val; unsigned int queue_id; struct list_head list; @@ -165,6 +166,7 @@ static void kfd_sdma_activity_worker(struct work_struct *work) INIT_LIST_HEAD(&sdma_q->list); sdma_q->rptr = (uint64_t __user *)q->properties.read_ptr; + sdma_q->mqd = q->mqd; sdma_q->queue_id = q->properties.queue_id; list_add_tail(&sdma_q->list, &sdma_q_list.list); } @@ -193,7 +195,16 @@ static void kfd_sdma_activity_worker(struct work_struct *work) list_for_each_entry(sdma_q, &sdma_q_list.list, list) { val = 0; - ret = read_sdma_queue_counter(sdma_q->rptr, &val); + + if ((KFD_GC_VERSION(dqm->dev) <= IP_VERSION(9, 4, 2))) + ret = read_sdma_queue_counter(sdma_q->rptr, &val); + else + ret = dqm->dev->kfd2kgd->hqd_sdma_get_counter ? + dqm->dev->kfd2kgd->hqd_sdma_get_counter( + dqm->dev->adev, + sdma_q->mqd, &val) : + 0; + if (ret) { pr_debug("Failed to read SDMA queue active counter for queue id: %d", sdma_q->queue_id); diff --git a/drivers/gpu/drm/amd/include/asic_reg/sdma/sdma_4_4_2_offset.h b/drivers/gpu/drm/amd/include/asic_reg/sdma/sdma_4_4_2_offset.h index ead81aeffd67..8700f8190c7c 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/sdma/sdma_4_4_2_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/sdma/sdma_4_4_2_offset.h @@ -493,6 +493,10 @@ #define regSDMA_RLC0_MIDCMD_DATA10_BASE_IDX 0 #define regSDMA_RLC0_MIDCMD_CNTL 0x017b #define regSDMA_RLC0_MIDCMD_CNTL_BASE_IDX 0 +#define regSDMA_RLC0_UTILIZATION_LO 0x017c +#define regSDMA_RLC0_UTILIZATION_LO_BASE_IDX 0 +#define regSDMA_RLC0_UTILIZATION_HI 0x017d +#define regSDMA_RLC0_UTILIZATION_HI_BASE_IDX 0 #define regSDMA_RLC1_RB_CNTL 0x0188 #define regSDMA_RLC1_RB_CNTL_BASE_IDX 0 #define regSDMA_RLC1_RB_BASE 0x0189 diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index d34c869b182f..f3220794c108 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -361,6 +361,8 @@ struct kfd2kgd_calls { uint32_t *ptl_state, enum amdgpu_ptl_fmt *fmt1, enum amdgpu_ptl_fmt *fmt2); + int (*hqd_sdma_get_counter)(struct amdgpu_device *adev, + void *mqd, uint64_t *val); }; #endif /* KGD_KFD_INTERFACE_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/include/v9_structs.h b/drivers/gpu/drm/amd/include/v9_structs.h index a2f81b9c38af..e0d387f08576 100644 --- a/drivers/gpu/drm/amd/include/v9_structs.h +++ b/drivers/gpu/drm/amd/include/v9_structs.h @@ -69,8 +69,8 @@ struct v9_sdma_mqd { uint32_t sdmax_rlcx_midcmd_cntl; uint32_t reserved_42; uint32_t reserved_43; - uint32_t reserved_44; - uint32_t reserved_45; + uint32_t sdmax_rlcx_utilization_lo; + uint32_t sdmax_rlcx_utilization_hi; uint32_t reserved_46; uint32_t reserved_47; uint32_t reserved_48; -- 2.34.1
