Track SDMA usage on a per process basis and report it through sysfs.
The value in the sysfs file indicates the amount of time SDMA has
been in-use by this process since the creation of the process.
This value is in microsecond granularity.

v2:
- Remove unnecessary checks for pdd is kfd_procfs_show().
- Make counter part of the kfd_sdma_activity_handler_workarea 
  structure.

Signed-off-by: Mukul Joshi <mukul.jo...@amd.com>
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c |  57 ++++++++
 .../drm/amd/amdkfd/kfd_device_queue_manager.h |   2 +
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  16 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c      | 130 ++++++++++++++++--
 4 files changed, 191 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index e9c4867abeff..49f72d0f7be7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -153,6 +153,52 @@ void decrement_queue_count(struct device_queue_manager 
*dqm,
                dqm->active_cp_queue_count--;
 }
 
+int read_sdma_queue_counter(struct queue *q, uint64_t *val)
+{
+       int ret;
+       uint64_t tmp = 0;
+
+       if (!q || !val)
+               return -EINVAL;
+       /*
+        * SDMA activity counter is stored at queue's RPTR + 0x8 location.
+        */
+       if (!access_ok((const void __user *)((uint64_t)q->properties.read_ptr +
+                                       sizeof(uint64_t)), sizeof(uint64_t))) {
+               pr_err("Can't access sdma queue activity counter\n");
+               return -EFAULT;
+       }
+
+       ret = get_user(tmp, (uint64_t *)((uint64_t)(q->properties.read_ptr) +
+                                                   sizeof(uint64_t)));
+       if (!ret) {
+               *val = tmp;
+       }
+
+       return ret;
+}
+
+static int update_sdma_queue_past_activity_stats(struct kfd_process_device 
*pdd,
+                                                struct queue *q)
+{
+       int ret;
+       uint64_t val = 0;
+
+       if (!pdd)
+               return -ENODEV;
+
+       ret = read_sdma_queue_counter(q, &val);
+       if (ret) {
+               pr_err("Failed to read SDMA queue counter for queue: %d\n",
+                               q->properties.queue_id);
+               return ret;
+       }
+
+       WRITE_ONCE(pdd->sdma_past_activity_counter, 
pdd->sdma_past_activity_counter + val);
+
+       return ret;
+}
+
 static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
 {
        struct kfd_dev *dev = qpd->dqm->dev;
@@ -487,6 +533,12 @@ static int destroy_queue_nocpsch_locked(struct 
device_queue_manager *dqm,
        if (retval == -ETIME)
                qpd->reset_wavefronts = true;
 
+       /* Get the SDMA queue stats */
+        if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
+            (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
+                update_sdma_queue_past_activity_stats(qpd_to_pdd(qpd), q);
+        }
+
        mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
 
        list_del(&q->list);
@@ -1468,6 +1520,11 @@ static int destroy_queue_cpsch(struct 
device_queue_manager *dqm,
                }
        }
 
+       /* Get the SDMA queue stats */
+       if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
+           (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
+               update_sdma_queue_past_activity_stats(qpd_to_pdd(qpd), q);
+       }
        /*
         * Unconditionally decrement this counter, regardless of the queue's
         * type
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 4afa015c69b1..894bcf877f9e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -251,4 +251,6 @@ static inline void dqm_unlock(struct device_queue_manager 
*dqm)
        mutex_unlock(&dqm->lock_hidden);
 }
 
+int read_sdma_queue_counter(struct queue *q, uint64_t *val);
+
 #endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index f70f789c3cb3..fae139b77c0a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -633,7 +633,14 @@ enum kfd_pdd_bound {
        PDD_BOUND_SUSPENDED,
 };
 
-#define MAX_VRAM_FILENAME_LEN 11
+#define MAX_SYSFS_FILENAME_LEN 11
+
+/*
+ * SDMA counter runs at 100MHz frequency.
+ * We display SDMA activity in microsecond granularity in sysfs.
+ * As a result, the divisor is 100.
+ */
+#define SDMA_ACTIVITY_DIVISOR  100
 
 /* Data that is per-process-per device. */
 struct kfd_process_device {
@@ -681,7 +688,12 @@ struct kfd_process_device {
        /* VRAM usage */
        uint64_t vram_usage;
        struct attribute attr_vram;
-       char vram_filename[MAX_VRAM_FILENAME_LEN];
+       char vram_filename[MAX_SYSFS_FILENAME_LEN];
+
+       /* SDMA activity tracking */
+       uint64_t sdma_past_activity_counter;
+       struct attribute attr_sdma;
+       char sdma_filename[MAX_SYSFS_FILENAME_LEN];
 };
 
 #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index d27221ddcdeb..e2ebed5c033f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -25,6 +25,7 @@
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/task.h>
+#include <linux/mmu_context.h>
 #include <linux/slab.h>
 #include <linux/amd-iommu.h>
 #include <linux/notifier.h>
@@ -76,6 +77,66 @@ struct kfd_procfs_tree {
 
 static struct kfd_procfs_tree procfs;
 
+/*
+ * Structure for SDMA activity tracking
+ */
+struct kfd_sdma_activity_handler_workarea {
+       struct work_struct sdma_activity_work;
+       struct kfd_process_device *pdd;
+       uint64_t sdma_current_activity_counter;
+};
+
+static void kfd_sdma_activity_worker(struct work_struct *work)
+{
+       struct kfd_sdma_activity_handler_workarea *workarea;
+       struct kfd_process_device *pdd;
+       uint64_t val;
+       struct mm_struct *mm;
+       struct queue *q;
+       struct qcm_process_device *qpd;
+       struct device_queue_manager *dqm;
+       int ret = 0;
+
+       workarea = container_of(work, struct kfd_sdma_activity_handler_workarea,
+                               sdma_activity_work);
+       if (!workarea)
+               return;
+
+       pdd = workarea->pdd;
+       dqm = pdd->dev->dqm;
+       qpd = &pdd->qpd;
+
+       if (!pdd || !dqm || !qpd)
+               return;
+
+       mm = get_task_mm(pdd->process->lead_thread);
+       if (!mm) {
+               return;
+       }
+
+       use_mm(mm);
+
+       dqm_lock(dqm);
+
+       list_for_each_entry(q, &qpd->queues_list, list) {
+               if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
+                   (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
+                       val = 0;
+                       ret = read_sdma_queue_counter(q, &val);
+                       if (ret)
+                               pr_debug("Failed to read SDMA queue active "
+                                        "counter for queue id: %d",
+                                        q->properties.queue_id);
+                       else
+                               workarea->sdma_current_activity_counter += val;
+               }
+       }
+
+       dqm_unlock(dqm);
+       unuse_mm(mm);
+       mmput(mm);
+}
+
 static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
                               char *buffer)
 {
@@ -87,8 +148,25 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct 
attribute *attr,
        } else if (strncmp(attr->name, "vram_", 5) == 0) {
                struct kfd_process_device *pdd = container_of(attr, struct 
kfd_process_device,
                                                              attr_vram);
-               if (pdd)
-                       return snprintf(buffer, PAGE_SIZE, "%llu\n", 
READ_ONCE(pdd->vram_usage));
+               return snprintf(buffer, PAGE_SIZE, "%llu\n", 
READ_ONCE(pdd->vram_usage));
+       } else if (strncmp(attr->name, "sdma_", 5) == 0) {
+               struct kfd_process_device *pdd = container_of(attr, struct 
kfd_process_device,
+                                                             attr_sdma);
+               struct kfd_sdma_activity_handler_workarea 
sdma_activity_work_handler;
+
+               INIT_WORK(&sdma_activity_work_handler.sdma_activity_work,
+                                       kfd_sdma_activity_worker);
+
+               sdma_activity_work_handler.pdd = pdd;
+
+               schedule_work(&sdma_activity_work_handler.sdma_activity_work);
+
+               flush_work(&sdma_activity_work_handler.sdma_activity_work);
+
+               return snprintf(buffer, PAGE_SIZE, "%llu\n",
+                       (READ_ONCE(pdd->sdma_past_activity_counter) +
+                        
sdma_activity_work_handler.sdma_current_activity_counter)/
+                        SDMA_ACTIVITY_DIVISOR);
        } else {
                pr_err("Invalid attribute");
                return -EINVAL;
@@ -210,7 +288,24 @@ int kfd_procfs_add_queue(struct queue *q)
        return 0;
 }
 
-int kfd_procfs_add_vram_usage(struct kfd_process *p)
+static int kfd_sysfs_create_file(struct kfd_process *p, struct attribute *attr,
+                                char *name)
+{
+       int ret = 0;
+
+       if (!p || !attr || !name)
+               return -EINVAL;
+
+       attr->name = name;
+       attr->mode = KFD_SYSFS_FILE_MODE;
+       sysfs_attr_init(attr);
+
+       ret = sysfs_create_file(p->kobj, attr);
+
+       return ret;
+}
+
+int kfd_procfs_add_sysfs_files(struct kfd_process *p)
 {
        int ret = 0;
        struct kfd_process_device *pdd;
@@ -221,17 +316,25 @@ int kfd_procfs_add_vram_usage(struct kfd_process *p)
        if (!p->kobj)
                return -EFAULT;
 
-       /* Create proc/<pid>/vram_<gpuid> file for each GPU */
+       /*
+        * Create sysfs files for each GPU:
+        * - proc/<pid>/vram_<gpuid>
+        * - proc/<pid>/sdma_<gpuid>
+        */
        list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
-               snprintf(pdd->vram_filename, MAX_VRAM_FILENAME_LEN, "vram_%u",
+               snprintf(pdd->vram_filename, MAX_SYSFS_FILENAME_LEN, "vram_%u",
                         pdd->dev->id);
-               pdd->attr_vram.name = pdd->vram_filename;
-               pdd->attr_vram.mode = KFD_SYSFS_FILE_MODE;
-               sysfs_attr_init(&pdd->attr_vram);
-               ret = sysfs_create_file(p->kobj, &pdd->attr_vram);
+               ret = kfd_sysfs_create_file(p, &pdd->attr_vram, 
pdd->vram_filename);
                if (ret)
                        pr_warn("Creating vram usage for gpu id %d failed",
                                (int)pdd->dev->id);
+
+               snprintf(pdd->sdma_filename, MAX_SYSFS_FILENAME_LEN, "sdma_%u",
+                        pdd->dev->id);
+               ret = kfd_sysfs_create_file(p, &pdd->attr_sdma, 
pdd->sdma_filename);
+               if (ret)
+                       pr_warn("Creating sdma usage for gpu id %d failed",
+                               (int)pdd->dev->id);
        }
 
        return ret;
@@ -444,9 +547,9 @@ struct kfd_process *kfd_create_process(struct file *filep)
                if (!process->kobj_queues)
                        pr_warn("Creating KFD proc/queues folder failed");
 
-               ret = kfd_procfs_add_vram_usage(process);
+               ret = kfd_procfs_add_sysfs_files(process);
                if (ret)
-                       pr_warn("Creating vram usage file for pid %d failed",
+                       pr_warn("Creating sysfs usage file for pid %d failed",
                                (int)process->lead_thread->pid);
        }
 out:
@@ -597,8 +700,10 @@ static void kfd_process_wq_release(struct work_struct 
*work)
                kobject_put(p->kobj_queues);
                p->kobj_queues = NULL;
 
-               list_for_each_entry(pdd, &p->per_device_data, per_device_list)
+               list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
                        sysfs_remove_file(p->kobj, &pdd->attr_vram);
+                       sysfs_remove_file(p->kobj, &pdd->attr_sdma);
+               }
 
                kobject_del(p->kobj);
                kobject_put(p->kobj);
@@ -906,6 +1011,7 @@ struct kfd_process_device 
*kfd_create_process_device_data(struct kfd_dev *dev,
        pdd->already_dequeued = false;
        pdd->runtime_inuse = false;
        pdd->vram_usage = 0;
+       pdd->sdma_past_activity_counter = 0;
        list_add(&pdd->per_device_list, &p->per_device_data);
 
        /* Init idr used for memory handle translation */
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to