skeleton to provide ring buffer for SPM collecting data, and create a work queue to dump data to user buffer.
Signed-off-by: James Zhu <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_profiler.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_spm.c | 127 ++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_spm.h | 6 + 3 files changed, 133 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_profiler.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_profiler.h index 369833f6e684..703a2a9bb8b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_profiler.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_profiler.h @@ -27,6 +27,7 @@ #include "amdgpu_spm.h" +#define AMDGPU_XCC_MASK(adev) ((1U << NUM_XCC(adev->gfx.xcc_mask)) - 1) #define AMDGPU_XCP_ID(x) (x == AMDGPU_XCP_NO_PARTITION ? 0 : x) #define fpriv_to_prof_mgr(fpriv) (&fpriv->userq_mgr.adev->prof_mgr) #define fpriv_to_adev(fpriv) fpriv->userq_mgr.adev diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.c index e7d16359551f..1ac3668c9744 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.c @@ -29,11 +29,40 @@ * 0.1 - Initial revision */ +struct amdgpu_spm_base { + u64 gpu_addr; + u32 ring_size; + u32 *cpu_addr; + void *spm_obj; +}; +struct amdgpu_spm_cntr { + struct amdgpu_spm_base spm[MAX_XCP]; + struct mutex spm_worker_mutex; +}; static int amdgpu_spm_release(struct amdgpu_spm_mgr *spm_mgr, struct drm_file *filp); +static void _amdgpu_spm_release(struct amdgpu_spm_mgr *spm_mgr, int inst, struct drm_file *filp); + + +static void amdgpu_spm_work(struct work_struct *work) +{ + struct amdgpu_spm_mgr *spm_mgr = container_of(work, struct amdgpu_spm_mgr, spm_work); + struct mm_struct *mm = NULL; + + mm = get_task_mm(spm_mgr->lead_thread); + if (mm) { + kthread_use_mm(mm); + { /* attach mm */ + /* TODO: dump spm ring buffer to user buffer */ + } /* detach mm */ + kthread_unuse_mm(mm); + /* release the mm structure */ + mmput(mm); + } +} static void amdgpu_spm_init_device(struct amdgpu_spm_mgr *spm_mgr) { - /* TODO */ + spm_mgr->spm_cntr = NULL; } static void amdgpu_spm_release_device(struct amdgpu_spm_mgr *spm_mgr, struct drm_file *filp) @@ -41,6 +70,96 @@ static void amdgpu_spm_release_device(struct amdgpu_spm_mgr *spm_mgr, struct drm amdgpu_spm_release(spm_mgr, filp); } +static int _amdgpu_spm_acquire(struct amdgpu_spm_mgr *spm_mgr, int inst, struct drm_file *filp) +{ + struct amdgpu_device *adev = mgr_to_adev(spm_mgr, spm_mgr); + struct amdgpu_spm_base *spm = &(spm_mgr->spm_cntr->spm[inst]); + int ret = 0; + + /* allocate 4M spm ring buffer */ + spm->ring_size = order_base_2(4 * 1024 * 1024/4); + spm->ring_size = (1 << spm->ring_size) * 4; + + ret = amdgpu_vm_alloc_gtt_mem(adev, + spm->ring_size, &spm->spm_obj, + &spm->gpu_addr, (void *)&spm->cpu_addr, + false, false); + + if (ret) + goto out; + + ret = amdgpu_rlc_spm_acquire(adev, inst, drm_priv_to_vm(filp), + spm->gpu_addr, spm->ring_size); + + /* + * By definition, the last 8 DWs of the buffer are not part of the rings + * and are instead part of the Meta data area. + */ + spm->ring_size -= 0x20; + + if (ret) + goto rlc_spm_acquire_failure; + + goto out; + +rlc_spm_acquire_failure: + amdgpu_vm_free_gtt_mem(adev, &spm->spm_obj); + memset(spm, 0, sizeof(*spm)); +out: + return ret; +} + +static int amdgpu_spm_acquire(struct amdgpu_spm_mgr *spm_mgr, struct drm_file *filp) +{ + struct amdgpu_device *adev = mgr_to_adev(spm_mgr, spm_mgr); + int ret = 0; + int inst; + + mutex_lock(&(to_prof_xcp_mgr(spm_mgr, spm_mgr)->mutex)); + + if (spm_mgr->spm_cntr) { + ret = -EBUSY; + goto out; + } + + spm_mgr->lead_thread = current->group_leader; + spm_mgr->spm_cntr = kzalloc(sizeof(struct amdgpu_spm_cntr), GFP_KERNEL); + if (!spm_mgr->spm_cntr) { + ret = -ENOMEM; + goto out; + } + + for_each_inst(inst, AMDGPU_XCC_MASK(adev)) { + ret = _amdgpu_spm_acquire(spm_mgr, inst, filp); + if (ret) + goto acquire_spm_failure; + } + + mutex_init(&spm_mgr->spm_cntr->spm_worker_mutex); + + INIT_WORK(&spm_mgr->spm_work, amdgpu_spm_work); + + spm_mgr->file = filp; + + goto out; + +acquire_spm_failure: + for_each_inst(inst, AMDGPU_XCC_MASK(adev)) + _amdgpu_spm_release(spm_mgr, inst, filp); + kfree(spm_mgr->spm_cntr); + spm_mgr->spm_cntr = NULL; + +out: + mutex_unlock(&(to_prof_xcp_mgr(spm_mgr, spm_mgr)->mutex)); + return ret; +} + +static void _amdgpu_spm_release(struct amdgpu_spm_mgr *spm_mgr, int inst, struct drm_file *filp) +{ + /* TODO: */ + +} + static int amdgpu_spm_release(struct amdgpu_spm_mgr *spm_mgr, struct drm_file *filp) { /* TODO */ @@ -52,9 +171,15 @@ int amdgpu_spm_ioctl(struct drm_device *dev, void __user *data, { struct amdgpu_fpriv *fpriv = filp->driver_priv; struct amdgpu_device *adev = fpriv_to_adev(fpriv); + struct amdgpu_profiler_mgr *prof_mgr = fpriv_to_prof_mgr(fpriv); struct drm_amdgpu_spm_args *args = data; + struct amdgpu_spm_mgr *spm_mgr = + &(prof_mgr->prof_xcp_mgr[AMDGPU_XCP_ID(fpriv->xcp_id)].spm_mgr); switch (args->op) { + case AMDGPU_SPM_OP_ACQUIRE: + return amdgpu_spm_acquire(spm_mgr, filp); + default: dev_err(adev->dev, "Invalid option: %i\n", args->op); return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.h index 3728f227bd61..ee4f03e2f6cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.h @@ -25,6 +25,12 @@ struct amdgpu_spm_mgr { struct drm_file *file; + + struct task_struct *lead_thread; + + /* spm data */ + struct amdgpu_spm_cntr *spm_cntr; + struct work_struct spm_work; }; int amdgpu_spm_ioctl(struct drm_device *dev, void __user *data, -- 2.34.1
