since gfx 9.4.3 HW is calculating accumulated activity counter
per-queue in register sdmax_rlcx_utilization_hi/lo, CPFW adds it in
sdma MQD for save/restore, KFD will read it from there. gfx 9.4.2
will still keep the way to read from memory at rptr+8.
Signed-off-by: Eric Huang <[email protected]>
---
.../drm/amd/amdkfd/kfd_device_queue_manager.c | 26 +++++++++++++------
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h | 1 +
.../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 18 +++++++++++++
drivers/gpu/drm/amd/amdkfd/kfd_process.c | 15 ++++++++++-
drivers/gpu/drm/amd/include/v9_structs.h | 4 +--
5 files changed, 53 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 4c2378bbdc95..1a69091aa695 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1064,8 +1064,14 @@ static int destroy_queue_nocpsch(struct
device_queue_manager *dqm,
/* Get the SDMA queue stats */
if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
(q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
- retval = read_sdma_queue_counter((uint64_t __user
*)q->properties.read_ptr,
- &sdma_val);
+ if (KFD_GC_VERSION(dqm->dev) <= IP_VERSION(9, 4, 2))
+ retval = read_sdma_queue_counter((uint64_t __user
*)q->properties.read_ptr,
+ &sdma_val);
+ else
+ retval = mqd_mgr->read_sdma_counter ?
+ mqd_mgr->read_sdma_counter(q->mqd, &sdma_val) :
+ 0;
+
if (retval)
dev_err(dev, "Failed to read SDMA queue counter for
queue: %d\n",
q->properties.queue_id);
@@ -2689,7 +2695,8 @@ static int destroy_queue_cpsch(struct
device_queue_manager *dqm,
struct queue *q)
{
int retval;
- struct mqd_manager *mqd_mgr;
+ struct mqd_manager *mqd_mgr =
+ dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)];
uint64_t sdma_val = 0;
struct kfd_process_device *pdd = qpd_to_pdd(qpd);
struct device *dev = dqm->dev->adev->dev;
@@ -2697,8 +2704,14 @@ static int destroy_queue_cpsch(struct
device_queue_manager *dqm,
/* Get the SDMA queue stats */
if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
(q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
- retval = read_sdma_queue_counter((uint64_t __user
*)q->properties.read_ptr,
- &sdma_val);
+ if (KFD_GC_VERSION(dqm->dev) <= IP_VERSION(9, 4, 2))
+ retval = read_sdma_queue_counter((uint64_t __user
*)q->properties.read_ptr,
+ &sdma_val);
+ else
+ retval = mqd_mgr->read_sdma_counter ?
+ mqd_mgr->read_sdma_counter(q->mqd, &sdma_val) :
+ 0;
+
if (retval)
dev_err(dev, "Failed to read SDMA queue counter for
queue: %d\n",
q->properties.queue_id);
@@ -2724,9 +2737,6 @@ static int destroy_queue_cpsch(struct
device_queue_manager *dqm,
}
- mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
- q->properties.type)];
-
deallocate_doorbell(qpd, q);
if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
index 06ca6235ff1b..7d3b801ea6e3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -123,6 +123,7 @@ struct mqd_manager {
bool (*check_preemption_failed)(struct mqd_manager *mm, void
*mqd);
uint64_t (*mqd_stride)(struct mqd_manager *mm,
struct queue_properties *p);
+ int (*read_sdma_counter)(void *mqd, uint64_t *val);
struct mutex mqd_mutex;
struct kfd_node *dev;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index a04102fd2fb7..fe3a676d734f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -959,6 +959,23 @@ static int get_wave_state_v9_4_3(struct
mqd_manager *mm, void *mqd,
return err;
}
+static int read_sdma_counter(void *mqd, uint64_t *val)
+{
+ struct v9_sdma_mqd *m = get_sdma_mqd(mqd);
+
+ if (!m)
+ return -EINVAL;
+
+ /* Since GC 9.4.3 sdma queue activity accumulated
+ * counter is saved/restored in MQD by CPFW when
+ * queue is unmapped/mapped.
+ */
+ *val = ((uint64_t)m->sdmax_rlcx_utilization_hi << 32) |
+ m->sdmax_rlcx_utilization_lo;
+
+ return 0;
+}
+
#if defined(CONFIG_DEBUG_FS)
static int debugfs_show_mqd(struct seq_file *m, void *data)
@@ -1072,6 +1089,7 @@ struct mqd_manager *mqd_manager_init_v9(enum
KFD_MQD_TYPE type,
mqd->restore_mqd = restore_mqd_sdma;
mqd->mqd_size = sizeof(struct v9_sdma_mqd);
mqd->mqd_stride = kfd_mqd_stride;
+ mqd->read_sdma_counter = read_sdma_counter;
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 419bb8086ccd..270f253213e4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -91,6 +91,7 @@ struct kfd_sdma_activity_handler_workarea {
struct temp_sdma_queue_list {
uint64_t __user *rptr;
+ void *mqd;
uint64_t sdma_val;
unsigned int queue_id;
struct list_head list;
@@ -108,6 +109,7 @@ static void kfd_sdma_activity_worker(struct
work_struct *work)
int ret = 0;
struct temp_sdma_queue_list sdma_q_list;
struct temp_sdma_queue_list *sdma_q, *next;
+ struct mqd_manager *mqd_mgr;
workarea = container_of(work, struct
kfd_sdma_activity_handler_workarea,
sdma_activity_work);
@@ -119,6 +121,9 @@ static void kfd_sdma_activity_worker(struct
work_struct *work)
qpd = &pdd->qpd;
if (!dqm || !qpd)
return;
+
+ mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA];
+
/*
* Total SDMA activity is current SDMA activity + past SDMA
activity
* Past SDMA count is stored in pdd.
@@ -161,6 +166,7 @@ static void kfd_sdma_activity_worker(struct
work_struct *work)
INIT_LIST_HEAD(&sdma_q->list);
sdma_q->rptr = (uint64_t __user *)q->properties.read_ptr;
+ sdma_q->mqd = q->mqd;
sdma_q->queue_id = q->properties.queue_id;
list_add_tail(&sdma_q->list, &sdma_q_list.list);
}
@@ -189,7 +195,14 @@ static void kfd_sdma_activity_worker(struct
work_struct *work)
list_for_each_entry(sdma_q, &sdma_q_list.list, list) {
val = 0;
- ret = read_sdma_queue_counter(sdma_q->rptr, &val);
+
+ if ((KFD_GC_VERSION(dqm->dev) <= IP_VERSION(9, 4, 2)))
+ ret = read_sdma_queue_counter(sdma_q->rptr, &val);
+ else
+ ret = mqd_mgr->read_sdma_counter ?
+ mqd_mgr->read_sdma_counter(sdma_q->mqd, &val) :
+ 0;
+
if (ret) {
pr_debug("Failed to read SDMA queue active counter for
queue id: %d",
sdma_q->queue_id);
diff --git a/drivers/gpu/drm/amd/include/v9_structs.h
b/drivers/gpu/drm/amd/include/v9_structs.h
index a2f81b9c38af..e0d387f08576 100644
--- a/drivers/gpu/drm/amd/include/v9_structs.h
+++ b/drivers/gpu/drm/amd/include/v9_structs.h
@@ -69,8 +69,8 @@ struct v9_sdma_mqd {
uint32_t sdmax_rlcx_midcmd_cntl;
uint32_t reserved_42;
uint32_t reserved_43;
- uint32_t reserved_44;
- uint32_t reserved_45;
+ uint32_t sdmax_rlcx_utilization_lo;
+ uint32_t sdmax_rlcx_utilization_hi;
uint32_t reserved_46;
uint32_t reserved_47;
uint32_t reserved_48;