to provide user buffer for SPM collecting data, start a work queue and SPM HW to dump data, wait for buffer filled, and wake up to return data back to user.
Signed-off-by: James Zhu <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_spm.c | 223 +++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_spm.h | 1 + 2 files changed, 223 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.c index 1ac3668c9744..4b2c7f7b494b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.c @@ -29,15 +29,35 @@ * 0.1 - Initial revision */ +#define AMDGPU_SPM_MAJOR_VERSION 0 +#define AMDGPU_SPM_MINOR_VERSION 1 + +struct spm_user_buf { + uint64_t __user *user_addr; + u32 ubufsize; +}; + struct amdgpu_spm_base { + struct spm_user_buf ubuf; u64 gpu_addr; u32 ring_size; + u32 ring_rptr; + u32 size_copied; + u32 has_data_loss; u32 *cpu_addr; void *spm_obj; + bool has_user_buf; + bool is_user_buf_filled; + bool is_spm_started; }; + struct amdgpu_spm_cntr { struct amdgpu_spm_base spm[MAX_XCP]; + int spm_use_cnt; struct mutex spm_worker_mutex; + wait_queue_head_t spm_buf_wq; + u32 have_users_buf_cnt; + bool are_users_buf_filled; }; static int amdgpu_spm_release(struct amdgpu_spm_mgr *spm_mgr, struct drm_file *filp); static void _amdgpu_spm_release(struct amdgpu_spm_mgr *spm_mgr, int inst, struct drm_file *filp); @@ -133,12 +153,16 @@ static int amdgpu_spm_acquire(struct amdgpu_spm_mgr *spm_mgr, struct drm_file *f ret = _amdgpu_spm_acquire(spm_mgr, inst, filp); if (ret) goto acquire_spm_failure; + spm_mgr->spm_cntr->spm_use_cnt++; } + spm_mgr->spm_cntr->have_users_buf_cnt = 0; mutex_init(&spm_mgr->spm_cntr->spm_worker_mutex); + init_waitqueue_head(&spm_mgr->spm_cntr->spm_buf_wq); INIT_WORK(&spm_mgr->spm_work, amdgpu_spm_work); + spin_lock_init(&spm_mgr->spm_irq_lock); spm_mgr->file = filp; goto out; @@ -166,6 +190,196 @@ static int amdgpu_spm_release(struct amdgpu_spm_mgr *spm_mgr, struct drm_file *f return 0; } +static int spm_update_dest_info(struct amdgpu_spm_mgr *spm_mgr, + int inst, struct drm_amdgpu_spm_args *user_spm_data, + struct drm_amdgpu_spm_args *user_spm_ptr) +{ + struct amdgpu_spm_base *spm = &(spm_mgr->spm_cntr->spm[inst]); + int ret = 0; + + mutex_lock(&spm_mgr->spm_cntr->spm_worker_mutex); + if (spm->has_user_buf) { + struct drm_amdgpu_spm_buffer_header spm_header; + uint64_t __user *user_address; + + user_spm_ptr->bytes_copied += spm->size_copied; + user_spm_ptr->has_data_loss += spm->has_data_loss; + + memset(&spm_header, 0, sizeof(spm_header)); + user_address = (uint64_t *)((uint64_t)spm->ubuf.user_addr - sizeof(spm_header)); + spm_header.version = AMDGPU_SPM_MAJOR_VERSION << 24 | + AMDGPU_SPM_MINOR_VERSION; + spm_header.bytes_copied = spm->size_copied; + spm_header.has_data_loss = spm->has_data_loss; + spm->has_user_buf = false; + spm_mgr->spm_cntr->have_users_buf_cnt--; + + ret = copy_to_user(user_address, &spm_header, sizeof(spm_header)); + if (ret) { + ret = -EFAULT; + goto out; + } + } + if (user_spm_data->dest_buf) { + spm->ubuf.user_addr = (uint64_t *)user_spm_data->dest_buf; + spm->ubuf.ubufsize = user_spm_data->buf_size; + /* reserve space for drm_amdgpu_spm_buffer_header */ + spm->ubuf.user_addr = (uint64_t *)((uint64_t)spm->ubuf.user_addr + + sizeof(struct drm_amdgpu_spm_buffer_header)); + spm->ubuf.ubufsize -= sizeof(struct drm_amdgpu_spm_buffer_header); + spm->has_data_loss = false; + spm->size_copied = 0; + spm->is_user_buf_filled = false; + spm->has_user_buf = true; + spm_mgr->spm_cntr->are_users_buf_filled = false; + spm_mgr->spm_cntr->have_users_buf_cnt++; + } +out: + mutex_unlock(&spm_mgr->spm_cntr->spm_worker_mutex); + return ret; +} + +static int spm_wait_for_fill_awake(struct amdgpu_spm_cntr *spm_cntr, + struct drm_amdgpu_spm_args *user_spm_data) +{ + int ret = 0; + + long timeout = msecs_to_jiffies(user_spm_data->timeout); + long start_jiffies = jiffies; + + ret = wait_event_interruptible_timeout(spm_cntr->spm_buf_wq, + (READ_ONCE(spm_cntr->are_users_buf_filled) == true), + timeout); + + switch (ret) { + case -ERESTARTSYS: + /* Subtract elapsed time from timeout so we wait that much + * less when the call gets restarted. + */ + timeout -= (jiffies - start_jiffies); + if (timeout <= 0) { + ret = -ETIME; + timeout = 0; + pr_debug("[%s] interrupted by signal\n", __func__); + } + break; + + case 0: + default: + timeout = ret; + ret = 0; + break; + } + user_spm_data->timeout = jiffies_to_msecs(timeout); + + return ret; +} + +static int amdgpu_set_dest_buffer(struct amdgpu_spm_mgr *spm_mgr, void __user *data) +{ + struct amdgpu_device *adev = mgr_to_adev(spm_mgr, spm_mgr); + struct drm_amdgpu_spm_args user_spm_data, *user_spm_ptr; + struct amdgpu_spm_cntr *spm_cntr; + bool need_schedule = false; + unsigned long flags; + u32 ubufsize; + int ret = 0; + int inst; + + dev_dbg(adev->dev, "SPM start to set new destination buffer."); + mutex_lock(&(to_prof_xcp_mgr(spm_mgr, spm_mgr)->mutex)); + spm_cntr = spm_mgr->spm_cntr; + if (spm_cntr == NULL) { + ret = -EINVAL; + goto out; + } + + user_spm_ptr = (struct drm_amdgpu_spm_args *) data; + ubufsize = user_spm_ptr->buf_size / spm_cntr->spm_use_cnt; + ubufsize = rounddown(ubufsize, 32); + + if (ubufsize <= sizeof(struct drm_amdgpu_spm_buffer_header)) { + ret = -EINVAL; + goto out; + } + + memcpy(&user_spm_data, user_spm_ptr, sizeof(user_spm_data)); + user_spm_data.buf_size = ubufsize; + + if (user_spm_data.timeout && spm_cntr->have_users_buf_cnt && + !READ_ONCE(spm_cntr->are_users_buf_filled)) { + dev_dbg(adev->dev, "SPM waiting for fill awake, timeout = %d ms.", + user_spm_data.timeout); + ret = spm_wait_for_fill_awake(spm_cntr, &user_spm_data); + if (ret == -ETIME) { + /* Copy (partial) data to user buffer after a timeout */ + schedule_work(&spm_mgr->spm_work); + flush_work(&spm_mgr->spm_work); + /* This is not an error */ + ret = 0; + } else if (ret) { + /* handle other errors normally, including -ERESTARTSYS */ + goto out; + } + } else if (!user_spm_data.timeout && spm_cntr->have_users_buf_cnt) { + /* Copy (partial) data to user buffer */ + schedule_work(&spm_mgr->spm_work); + flush_work(&spm_mgr->spm_work); + } + + user_spm_ptr->bytes_copied = 0; + user_spm_ptr->has_data_loss = 0; + for_each_inst(inst, AMDGPU_XCC_MASK(adev)) { + struct amdgpu_spm_base *spm = &(spm_cntr->spm[inst]); + + if (spm->has_user_buf || user_spm_data.dest_buf) { + /* Get info about filled space in previous output buffer. + * Setup new dest buf if provided. + */ + ret = spm_update_dest_info(spm_mgr, inst, &user_spm_data, user_spm_ptr); + if (ret) + goto out; + } + + if (user_spm_data.dest_buf) { + /* Start SPM if necessary*/ + if (spm->is_spm_started == false) { + amdgpu_rlc_spm_cntl(adev, inst, 1); + spin_lock_irqsave(&spm_mgr->spm_irq_lock, flags); + spm->is_spm_started = true; + /* amdgpu_rlc_spm_cntl() will reset SPM and + * wptr will become 0, adjust rptr accordingly. + */ + spm->ring_rptr = 0; + spin_unlock_irqrestore(&spm_mgr->spm_irq_lock, flags); + } else { + /* If SPM was already started, there may already + * be data in the ring-buffer that needs to be read. + */ + need_schedule = true; + } + user_spm_data.dest_buf += ubufsize; + } else { + amdgpu_rlc_spm_cntl(adev, inst, 0); + spin_lock_irqsave(&spm_mgr->spm_irq_lock, flags); + spm->is_spm_started = false; + /* amdgpu_rlc_spm_cntl() will reset SPM and wptr will become 0. + * Adjust rptr accordingly + */ + spm->ring_rptr = 0; + spin_unlock_irqrestore(&spm_mgr->spm_irq_lock, flags); + } + } + +out: + mutex_unlock(&(to_prof_xcp_mgr(spm_mgr, spm_mgr)->mutex)); + if (need_schedule) + schedule_work(&spm_mgr->spm_work); + + dev_dbg(adev->dev, "SPM finish to set new destination buffer, ret = %d.", ret); + return ret; +} + int amdgpu_spm_ioctl(struct drm_device *dev, void __user *data, struct drm_file *filp) { @@ -180,6 +394,9 @@ int amdgpu_spm_ioctl(struct drm_device *dev, void __user *data, case AMDGPU_SPM_OP_ACQUIRE: return amdgpu_spm_acquire(spm_mgr, filp); + case AMDGPU_SPM_OP_SET_DEST_BUF: + return amdgpu_set_dest_buffer(spm_mgr, data); + default: dev_err(adev->dev, "Invalid option: %i\n", args->op); return -EINVAL; @@ -203,6 +420,7 @@ void amdgpu_spm_mgr_fini(struct amdgpu_spm_mgr *spm_mgr) void amdgpu_spm_interrupt(struct amdgpu_device *adev, int xcc_id) { uint8_t xcp_id; + unsigned long flags; struct amdgpu_spm_mgr *spm_mgr; xcp_id = adev->xcp_mgr ? @@ -215,5 +433,8 @@ void amdgpu_spm_interrupt(struct amdgpu_device *adev, int xcc_id) return; } - /* TODO */ + spin_lock_irqsave(&spm_mgr->spm_irq_lock, flags); + if (spm_mgr->spm_cntr && spm_mgr->spm_cntr->spm[xcc_id].is_spm_started) + schedule_work(&spm_mgr->spm_work); + spin_unlock_irqrestore(&spm_mgr->spm_irq_lock, flags); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.h index ee4f03e2f6cf..075ad7eaad01 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.h @@ -31,6 +31,7 @@ struct amdgpu_spm_mgr { /* spm data */ struct amdgpu_spm_cntr *spm_cntr; struct work_struct spm_work; + spinlock_t spm_irq_lock; }; int amdgpu_spm_ioctl(struct drm_device *dev, void __user *data, -- 2.34.1
