since gfx 9.4.3 HW is calculating accumulated activity counter per-queue in register sdmax_rlcx_utilization_hi/lo, CPFW adds it in sdma MQD for save/restore, KFD will read it from there. gfx 9.4.2 will still keep the way to read from memory at rptr+8.
Signed-off-by: Eric Huang <[email protected]> --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 26 +++++++++++++------ drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h | 1 + .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 18 +++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 15 ++++++++++- drivers/gpu/drm/amd/include/v9_structs.h | 4 +-- 5 files changed, 53 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 4c2378bbdc95..1a69091aa695 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1064,8 +1064,14 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, /* Get the SDMA queue stats */ if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { - retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, - &sdma_val); + if (KFD_GC_VERSION(dqm->dev) <= IP_VERSION(9, 4, 2)) + retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, + &sdma_val); + else + retval = mqd_mgr->read_sdma_counter ? + mqd_mgr->read_sdma_counter(q->mqd, &sdma_val) : + 0; + if (retval) dev_err(dev, "Failed to read SDMA queue counter for queue: %d\n", q->properties.queue_id); @@ -2689,7 +2695,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, struct queue *q) { int retval; - struct mqd_manager *mqd_mgr; + struct mqd_manager *mqd_mgr = + dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)]; uint64_t sdma_val = 0; struct kfd_process_device *pdd = qpd_to_pdd(qpd); struct device *dev = dqm->dev->adev->dev; @@ -2697,8 +2704,14 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, /* Get the SDMA queue stats */ if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { - retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, - &sdma_val); + if (KFD_GC_VERSION(dqm->dev) <= IP_VERSION(9, 4, 2)) + retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, + &sdma_val); + else + retval = mqd_mgr->read_sdma_counter ? + mqd_mgr->read_sdma_counter(q->mqd, &sdma_val) : + 0; + if (retval) dev_err(dev, "Failed to read SDMA queue counter for queue: %d\n", q->properties.queue_id); @@ -2724,9 +2737,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, } - mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( - q->properties.type)]; - deallocate_doorbell(qpd, q); if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h index 06ca6235ff1b..7d3b801ea6e3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h @@ -123,6 +123,7 @@ struct mqd_manager { bool (*check_preemption_failed)(struct mqd_manager *mm, void *mqd); uint64_t (*mqd_stride)(struct mqd_manager *mm, struct queue_properties *p); + int (*read_sdma_counter)(void *mqd, uint64_t *val); struct mutex mqd_mutex; struct kfd_node *dev; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index a04102fd2fb7..fe3a676d734f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -959,6 +959,23 @@ static int get_wave_state_v9_4_3(struct mqd_manager *mm, void *mqd, return err; } +static int read_sdma_counter(void *mqd, uint64_t *val) +{ + struct v9_sdma_mqd *m = get_sdma_mqd(mqd); + + if (!m) + return -EINVAL; + + /* Since GC 9.4.3 sdma queue activity accumulated + * counter is saved/restored in MQD by CPFW when + * queue is unmapped/mapped. + */ + *val = ((uint64_t)m->sdmax_rlcx_utilization_hi << 32) | + m->sdmax_rlcx_utilization_lo; + + return 0; +} + #if defined(CONFIG_DEBUG_FS) static int debugfs_show_mqd(struct seq_file *m, void *data) @@ -1072,6 +1089,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->restore_mqd = restore_mqd_sdma; mqd->mqd_size = sizeof(struct v9_sdma_mqd); mqd->mqd_stride = kfd_mqd_stride; + mqd->read_sdma_counter = read_sdma_counter; #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 419bb8086ccd..270f253213e4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -91,6 +91,7 @@ struct kfd_sdma_activity_handler_workarea { struct temp_sdma_queue_list { uint64_t __user *rptr; + void *mqd; uint64_t sdma_val; unsigned int queue_id; struct list_head list; @@ -108,6 +109,7 @@ static void kfd_sdma_activity_worker(struct work_struct *work) int ret = 0; struct temp_sdma_queue_list sdma_q_list; struct temp_sdma_queue_list *sdma_q, *next; + struct mqd_manager *mqd_mgr; workarea = container_of(work, struct kfd_sdma_activity_handler_workarea, sdma_activity_work); @@ -119,6 +121,9 @@ static void kfd_sdma_activity_worker(struct work_struct *work) qpd = &pdd->qpd; if (!dqm || !qpd) return; + + mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]; + /* * Total SDMA activity is current SDMA activity + past SDMA activity * Past SDMA count is stored in pdd. @@ -161,6 +166,7 @@ static void kfd_sdma_activity_worker(struct work_struct *work) INIT_LIST_HEAD(&sdma_q->list); sdma_q->rptr = (uint64_t __user *)q->properties.read_ptr; + sdma_q->mqd = q->mqd; sdma_q->queue_id = q->properties.queue_id; list_add_tail(&sdma_q->list, &sdma_q_list.list); } @@ -189,7 +195,14 @@ static void kfd_sdma_activity_worker(struct work_struct *work) list_for_each_entry(sdma_q, &sdma_q_list.list, list) { val = 0; - ret = read_sdma_queue_counter(sdma_q->rptr, &val); + + if ((KFD_GC_VERSION(dqm->dev) <= IP_VERSION(9, 4, 2))) + ret = read_sdma_queue_counter(sdma_q->rptr, &val); + else + ret = mqd_mgr->read_sdma_counter ? + mqd_mgr->read_sdma_counter(sdma_q->mqd, &val) : + 0; + if (ret) { pr_debug("Failed to read SDMA queue active counter for queue id: %d", sdma_q->queue_id); diff --git a/drivers/gpu/drm/amd/include/v9_structs.h b/drivers/gpu/drm/amd/include/v9_structs.h index a2f81b9c38af..e0d387f08576 100644 --- a/drivers/gpu/drm/amd/include/v9_structs.h +++ b/drivers/gpu/drm/amd/include/v9_structs.h @@ -69,8 +69,8 @@ struct v9_sdma_mqd { uint32_t sdmax_rlcx_midcmd_cntl; uint32_t reserved_42; uint32_t reserved_43; - uint32_t reserved_44; - uint32_t reserved_45; + uint32_t sdmax_rlcx_utilization_lo; + uint32_t sdmax_rlcx_utilization_hi; uint32_t reserved_46; uint32_t reserved_47; uint32_t reserved_48; -- 2.34.1
