Am 2020-05-26 um 1:26 p.m. schrieb Mukul Joshi:
> Track SDMA usage on a per process basis and report it through sysfs.
> The value in the sysfs file indicates the amount of time SDMA has
> been in-use by this process since the creation of the process.
> This value is in microsecond granularity.
>
> v2:
> - Remove unnecessary checks for pdd is kfd_procfs_show().
> - Make counter part of the kfd_sdma_activity_handler_workarea
>   structure.
>
> v3:
> - Remove READ_ONCE/WRITE_ONCE while updating acitivty

Typo: activity


>   counter.
> - Add updation of past acitivt counter under dqm_lock.

Typo: activity

Other than that, the patch is

Reviewed-by: Felix Kuehling <felix.kuehl...@amd.com>


>
> Signed-off-by: Mukul Joshi <mukul.jo...@amd.com>
> ---
>  .../drm/amd/amdkfd/kfd_device_queue_manager.c |  57 ++++++++
>  .../drm/amd/amdkfd/kfd_device_queue_manager.h |   2 +
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  16 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c      | 137 ++++++++++++++++--
>  4 files changed, 198 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index e9c4867abeff..6293017bd5bf 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -153,6 +153,52 @@ void decrement_queue_count(struct device_queue_manager 
> *dqm,
>               dqm->active_cp_queue_count--;
>  }
>  
> +int read_sdma_queue_counter(struct queue *q, uint64_t *val)
> +{
> +     int ret;
> +     uint64_t tmp = 0;
> +
> +     if (!q || !val)
> +             return -EINVAL;
> +     /*
> +      * SDMA activity counter is stored at queue's RPTR + 0x8 location.
> +      */
> +     if (!access_ok((const void __user *)((uint64_t)q->properties.read_ptr +
> +                                     sizeof(uint64_t)), sizeof(uint64_t))) {
> +             pr_err("Can't access sdma queue activity counter\n");
> +             return -EFAULT;
> +     }
> +
> +     ret = get_user(tmp, (uint64_t *)((uint64_t)(q->properties.read_ptr) +
> +                                                 sizeof(uint64_t)));
> +     if (!ret) {
> +             *val = tmp;
> +     }
> +
> +     return ret;
> +}
> +
> +static int update_sdma_queue_past_activity_stats(struct kfd_process_device 
> *pdd,
> +                                              struct queue *q)
> +{
> +     int ret;
> +     uint64_t val = 0;
> +
> +     if (!pdd)
> +             return -ENODEV;
> +
> +     ret = read_sdma_queue_counter(q, &val);
> +     if (ret) {
> +             pr_err("Failed to read SDMA queue counter for queue: %d\n",
> +                             q->properties.queue_id);
> +             return ret;
> +     }
> +
> +     pdd->sdma_past_activity_counter += val;
> +
> +     return ret;
> +}
> +
>  static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
>  {
>       struct kfd_dev *dev = qpd->dqm->dev;
> @@ -487,6 +533,12 @@ static int destroy_queue_nocpsch_locked(struct 
> device_queue_manager *dqm,
>       if (retval == -ETIME)
>               qpd->reset_wavefronts = true;
>  
> +     /* Get the SDMA queue stats */
> +        if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
> +            (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
> +                update_sdma_queue_past_activity_stats(qpd_to_pdd(qpd), q);
> +        }
> +
>       mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
>  
>       list_del(&q->list);
> @@ -1468,6 +1520,11 @@ static int destroy_queue_cpsch(struct 
> device_queue_manager *dqm,
>               }
>       }
>  
> +     /* Get the SDMA queue stats */
> +     if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
> +         (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
> +             update_sdma_queue_past_activity_stats(qpd_to_pdd(qpd), q);
> +     }
>       /*
>        * Unconditionally decrement this counter, regardless of the queue's
>        * type
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h 
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> index 4afa015c69b1..894bcf877f9e 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> @@ -251,4 +251,6 @@ static inline void dqm_unlock(struct device_queue_manager 
> *dqm)
>       mutex_unlock(&dqm->lock_hidden);
>  }
>  
> +int read_sdma_queue_counter(struct queue *q, uint64_t *val);
> +
>  #endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index f70f789c3cb3..fae139b77c0a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -633,7 +633,14 @@ enum kfd_pdd_bound {
>       PDD_BOUND_SUSPENDED,
>  };
>  
> -#define MAX_VRAM_FILENAME_LEN 11
> +#define MAX_SYSFS_FILENAME_LEN 11
> +
> +/*
> + * SDMA counter runs at 100MHz frequency.
> + * We display SDMA activity in microsecond granularity in sysfs.
> + * As a result, the divisor is 100.
> + */
> +#define SDMA_ACTIVITY_DIVISOR  100
>  
>  /* Data that is per-process-per device. */
>  struct kfd_process_device {
> @@ -681,7 +688,12 @@ struct kfd_process_device {
>       /* VRAM usage */
>       uint64_t vram_usage;
>       struct attribute attr_vram;
> -     char vram_filename[MAX_VRAM_FILENAME_LEN];
> +     char vram_filename[MAX_SYSFS_FILENAME_LEN];
> +
> +     /* SDMA activity tracking */
> +     uint64_t sdma_past_activity_counter;
> +     struct attribute attr_sdma;
> +     char sdma_filename[MAX_SYSFS_FILENAME_LEN];
>  };
>  
>  #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index d27221ddcdeb..db010c5da144 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -25,6 +25,7 @@
>  #include <linux/sched.h>
>  #include <linux/sched/mm.h>
>  #include <linux/sched/task.h>
> +#include <linux/mmu_context.h>
>  #include <linux/slab.h>
>  #include <linux/amd-iommu.h>
>  #include <linux/notifier.h>
> @@ -76,6 +77,74 @@ struct kfd_procfs_tree {
>  
>  static struct kfd_procfs_tree procfs;
>  
> +/*
> + * Structure for SDMA activity tracking
> + */
> +struct kfd_sdma_activity_handler_workarea {
> +     struct work_struct sdma_activity_work;
> +     struct kfd_process_device *pdd;
> +     uint64_t sdma_activity_counter;
> +};
> +
> +static void kfd_sdma_activity_worker(struct work_struct *work)
> +{
> +     struct kfd_sdma_activity_handler_workarea *workarea;
> +     struct kfd_process_device *pdd;
> +     uint64_t val;
> +     struct mm_struct *mm;
> +     struct queue *q;
> +     struct qcm_process_device *qpd;
> +     struct device_queue_manager *dqm;
> +     int ret = 0;
> +
> +     workarea = container_of(work, struct kfd_sdma_activity_handler_workarea,
> +                             sdma_activity_work);
> +     if (!workarea)
> +             return;
> +
> +     pdd = workarea->pdd;
> +     dqm = pdd->dev->dqm;
> +     qpd = &pdd->qpd;
> +
> +     if (!pdd || !dqm || !qpd)
> +             return;
> +
> +     mm = get_task_mm(pdd->process->lead_thread);
> +     if (!mm) {
> +             return;
> +     }
> +
> +     use_mm(mm);
> +
> +     dqm_lock(dqm);
> +
> +     /*
> +      * Total SDMA activity is current SDMA activity + past SDMA activity
> +      */
> +     workarea->sdma_activity_counter = pdd->sdma_past_activity_counter;
> +
> +     /*
> +      * Get the current activity counters for all active SDMA queues
> +      */
> +     list_for_each_entry(q, &qpd->queues_list, list) {
> +             if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
> +                 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
> +                     val = 0;
> +                     ret = read_sdma_queue_counter(q, &val);
> +                     if (ret)
> +                             pr_debug("Failed to read SDMA queue active "
> +                                      "counter for queue id: %d",
> +                                      q->properties.queue_id);
> +                     else
> +                             workarea->sdma_activity_counter += val;
> +             }
> +     }
> +
> +     dqm_unlock(dqm);
> +     unuse_mm(mm);
> +     mmput(mm);
> +}
> +
>  static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
>                              char *buffer)
>  {
> @@ -87,8 +156,24 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, 
> struct attribute *attr,
>       } else if (strncmp(attr->name, "vram_", 5) == 0) {
>               struct kfd_process_device *pdd = container_of(attr, struct 
> kfd_process_device,
>                                                             attr_vram);
> -             if (pdd)
> -                     return snprintf(buffer, PAGE_SIZE, "%llu\n", 
> READ_ONCE(pdd->vram_usage));
> +             return snprintf(buffer, PAGE_SIZE, "%llu\n", 
> READ_ONCE(pdd->vram_usage));
> +     } else if (strncmp(attr->name, "sdma_", 5) == 0) {
> +             struct kfd_process_device *pdd = container_of(attr, struct 
> kfd_process_device,
> +                                                           attr_sdma);
> +             struct kfd_sdma_activity_handler_workarea 
> sdma_activity_work_handler;
> +
> +             INIT_WORK(&sdma_activity_work_handler.sdma_activity_work,
> +                                     kfd_sdma_activity_worker);
> +
> +             sdma_activity_work_handler.pdd = pdd;
> +
> +             schedule_work(&sdma_activity_work_handler.sdma_activity_work);
> +
> +             flush_work(&sdma_activity_work_handler.sdma_activity_work);
> +
> +             return snprintf(buffer, PAGE_SIZE, "%llu\n",
> +                             
> (sdma_activity_work_handler.sdma_activity_counter)/
> +                              SDMA_ACTIVITY_DIVISOR);
>       } else {
>               pr_err("Invalid attribute");
>               return -EINVAL;
> @@ -210,7 +295,24 @@ int kfd_procfs_add_queue(struct queue *q)
>       return 0;
>  }
>  
> -int kfd_procfs_add_vram_usage(struct kfd_process *p)
> +static int kfd_sysfs_create_file(struct kfd_process *p, struct attribute 
> *attr,
> +                              char *name)
> +{
> +     int ret = 0;
> +
> +     if (!p || !attr || !name)
> +             return -EINVAL;
> +
> +     attr->name = name;
> +     attr->mode = KFD_SYSFS_FILE_MODE;
> +     sysfs_attr_init(attr);
> +
> +     ret = sysfs_create_file(p->kobj, attr);
> +
> +     return ret;
> +}
> +
> +int kfd_procfs_add_sysfs_files(struct kfd_process *p)
>  {
>       int ret = 0;
>       struct kfd_process_device *pdd;
> @@ -221,17 +323,25 @@ int kfd_procfs_add_vram_usage(struct kfd_process *p)
>       if (!p->kobj)
>               return -EFAULT;
>  
> -     /* Create proc/<pid>/vram_<gpuid> file for each GPU */
> +     /*
> +      * Create sysfs files for each GPU:
> +      * - proc/<pid>/vram_<gpuid>
> +      * - proc/<pid>/sdma_<gpuid>
> +      */
>       list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
> -             snprintf(pdd->vram_filename, MAX_VRAM_FILENAME_LEN, "vram_%u",
> +             snprintf(pdd->vram_filename, MAX_SYSFS_FILENAME_LEN, "vram_%u",
>                        pdd->dev->id);
> -             pdd->attr_vram.name = pdd->vram_filename;
> -             pdd->attr_vram.mode = KFD_SYSFS_FILE_MODE;
> -             sysfs_attr_init(&pdd->attr_vram);
> -             ret = sysfs_create_file(p->kobj, &pdd->attr_vram);
> +             ret = kfd_sysfs_create_file(p, &pdd->attr_vram, 
> pdd->vram_filename);
>               if (ret)
>                       pr_warn("Creating vram usage for gpu id %d failed",
>                               (int)pdd->dev->id);
> +
> +             snprintf(pdd->sdma_filename, MAX_SYSFS_FILENAME_LEN, "sdma_%u",
> +                      pdd->dev->id);
> +             ret = kfd_sysfs_create_file(p, &pdd->attr_sdma, 
> pdd->sdma_filename);
> +             if (ret)
> +                     pr_warn("Creating sdma usage for gpu id %d failed",
> +                             (int)pdd->dev->id);
>       }
>  
>       return ret;
> @@ -444,9 +554,9 @@ struct kfd_process *kfd_create_process(struct file *filep)
>               if (!process->kobj_queues)
>                       pr_warn("Creating KFD proc/queues folder failed");
>  
> -             ret = kfd_procfs_add_vram_usage(process);
> +             ret = kfd_procfs_add_sysfs_files(process);
>               if (ret)
> -                     pr_warn("Creating vram usage file for pid %d failed",
> +                     pr_warn("Creating sysfs usage file for pid %d failed",
>                               (int)process->lead_thread->pid);
>       }
>  out:
> @@ -597,8 +707,10 @@ static void kfd_process_wq_release(struct work_struct 
> *work)
>               kobject_put(p->kobj_queues);
>               p->kobj_queues = NULL;
>  
> -             list_for_each_entry(pdd, &p->per_device_data, per_device_list)
> +             list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
>                       sysfs_remove_file(p->kobj, &pdd->attr_vram);
> +                     sysfs_remove_file(p->kobj, &pdd->attr_sdma);
> +             }
>  
>               kobject_del(p->kobj);
>               kobject_put(p->kobj);
> @@ -906,6 +1018,7 @@ struct kfd_process_device 
> *kfd_create_process_device_data(struct kfd_dev *dev,
>       pdd->already_dequeued = false;
>       pdd->runtime_inuse = false;
>       pdd->vram_usage = 0;
> +     pdd->sdma_past_activity_counter = 0;
>       list_add(&pdd->per_device_list, &p->per_device_data);
>  
>       /* Init idr used for memory handle translation */
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to