to implement AMDGPU_SPM_OP_SET_DEST_BUF, which registers a user-space
destination buffer for SPM data streaming, optionally waits for the
previous buffer to be filled, and starts or stops the SPM hardware
per XCC instance.
Data structure additions (amdgpu_spm.h):
struct spm_user_buf:
Tracks the current user-space destination buffer per XCC: the
adjusted user_addr (offset past the drm_amdgpu_spm_buffer_header)
and remaining ubufsize in bytes.
amdgpu_spm_base extended with:
ubuf (spm_user_buf), ring_rptr, size_copied, has_data_loss,
has_user_buf (buffer registered), is_user_buf_filled (buffer full),
is_spm_started (SPM HW running).
amdgpu_spm_cntr extended with:
spm_use_cnt (number of active XCC ring buffers), have_users_buf_cnt
(number of XCCs with a registered user buffer), are_users_buf_filled
(true when all registered buffers are full), spm_buf_wq (wait queue
head for blocking on buffer fill).
amdgpu_spm_mgr extended with:
spm_irq_lock (spinlock protecting is_spm_started and ring_rptr
against concurrent IRQ handler access).
SET_DEST_BUF flow (amdgpu_set_dest_buffer):
1. Validates spm_cntr is allocated and spm_use_cnt > 0.
2. Splits the caller-supplied buf_size evenly across all active XCCs
(spm_use_cnt), rounded down to 32-byte alignment. Returns -EINVAL
if the per-XCC slice cannot accommodate a drm_amdgpu_spm_buffer_header
3. If timeout > 0 and a previous buffer is still being filled,
blocks on spm_buf_wq via wait_event_interruptible_timeout().
On -ERESTARTSYS the remaining timeout is computed and returned so
the syscall can be transparently restarted by user space. On
-ETIME (timeout expired), the work queue is flushed immediately
to capture any partial data, and the call returns success.
If timeout == 0 and a previous buffer exists, the work queue is
flushed immediately without waiting.
4. For each XCC in AMDGPU_XCC_MASK(adev), calls spm_update_dest_info():
- If a previous user buffer was registered (has_user_buf), writes
the drm_amdgpu_spm_buffer_header (SPM version, bytes_copied,
has_data_loss to the reserved header slot just before ubuf.user_addr
via copy_to_user(), accumulates bytes_copied and has_data_loss into
the ioctl output args, and decrements have_users_buf_cnt.
- If a new dest_buf is provided, advances the internal user_addr
past the header, records ubufsize, resets size_copied, has_data_loss
and is_user_buf_filled, sets has_user_buf, and increments
have_users_buf_cnt.
5. If dest_buf != NULL and the XCC's SPM is not yet started:
calls amdgpu_rlc_spm_cntl(start), resets ring_rptr to 0 under
spm_irq_lock (since amdgpu_rlc_spm_cntl() resets wptr to 0),
and sets is_spm_started. If SPM was already running, sets
need_schedule to drain existing ring data after the mutex is
released.
6. If dest_buf == NULL: stops the SPM hardware via
amdgpu_rlc_spm_cntl(stop), clears is_spm_started and ring_rptr
under spm_irq_lock.
7. After releasing the XCP mutex, schedules the work queue if
need_schedule is set.
amdgpu_spm_interrupt() stub is implemented: resolves the xcp_id from
xcc_id using amdgpu_xcp_get_partition() (defaulting to 0 on non-
partitioned devices), looks up the corresponding spm_mgr, and under
spm_irq_lock schedules the work queue if SPM is active on that XCC.
Signed-off-by: James Zhu <[email protected]>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_spm.c | 217 +++++++++++++++++++++++-
drivers/gpu/drm/amd/amdgpu/amdgpu_spm.h | 18 ++
2 files changed, 233 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.c
index e58b89ccd83f..896a0fef576c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.c
@@ -29,6 +29,9 @@
* 0.1 - Initial revision
*/
+#define AMDGPU_SPM_MAJOR_VERSION 0
+#define AMDGPU_SPM_MINOR_VERSION 1
+
static int amdgpu_spm_release(struct amdgpu_spm_mgr *spm_mgr, struct drm_file
*filp);
static void _amdgpu_spm_release(struct amdgpu_spm_mgr *spm_mgr, int inst,
struct drm_file *filp);
@@ -120,12 +123,16 @@ static int amdgpu_spm_acquire(struct amdgpu_spm_mgr
*spm_mgr, struct drm_file *f
ret = _amdgpu_spm_acquire(spm_mgr, inst, filp);
if (ret)
goto acquire_spm_failure;
+ spm_mgr->spm_cntr->spm_use_cnt++;
}
+ spm_mgr->spm_cntr->have_users_buf_cnt = 0;
mutex_init(&spm_mgr->spm_cntr->spm_worker_mutex);
+ init_waitqueue_head(&spm_mgr->spm_cntr->spm_buf_wq);
INIT_WORK(&spm_mgr->spm_work, amdgpu_spm_work);
+ spin_lock_init(&spm_mgr->spm_irq_lock);
spm_mgr->file = filp;
goto out;
@@ -153,6 +160,197 @@ static int amdgpu_spm_release(struct amdgpu_spm_mgr
*spm_mgr, struct drm_file *f
return 0;
}
+static int spm_update_dest_info(struct amdgpu_spm_mgr *spm_mgr,
+ int inst, struct drm_amdgpu_spm_args
*user_spm_data,
+ struct drm_amdgpu_spm_args *user_spm_ptr)
+{
+ struct amdgpu_spm_base *spm = &(spm_mgr->spm_cntr->spm[inst]);
+ int ret = 0;
+
+ mutex_lock(&spm_mgr->spm_cntr->spm_worker_mutex);
+ if (spm->has_user_buf) {
+ struct drm_amdgpu_spm_buffer_header spm_header;
+ uint64_t __user *user_address;
+
+ user_spm_ptr->bytes_copied += spm->size_copied;
+ user_spm_ptr->has_data_loss += spm->has_data_loss;
+
+ memset(&spm_header, 0, sizeof(spm_header));
+ user_address = (uint64_t *)((uint64_t)spm->ubuf.user_addr -
sizeof(spm_header));
+ spm_header.version = AMDGPU_SPM_MAJOR_VERSION << 24 |
+ AMDGPU_SPM_MINOR_VERSION;
+ spm_header.bytes_copied = spm->size_copied;
+ spm_header.has_data_loss = spm->has_data_loss;
+ spm->has_user_buf = false;
+ spm_mgr->spm_cntr->have_users_buf_cnt--;
+
+ ret = copy_to_user(user_address, &spm_header,
sizeof(spm_header));
+ if (ret) {
+ ret = -EFAULT;
+ goto out;
+ }
+ }
+ if (user_spm_data->dest_buf) {
+ spm->ubuf.user_addr = (uint64_t *)user_spm_data->dest_buf;
+ spm->ubuf.ubufsize = user_spm_data->buf_size;
+ /* reserve space for drm_amdgpu_spm_buffer_header */
+ spm->ubuf.user_addr = (uint64_t
*)((uint64_t)spm->ubuf.user_addr +
+ sizeof(struct
drm_amdgpu_spm_buffer_header));
+ spm->ubuf.ubufsize -= sizeof(struct
drm_amdgpu_spm_buffer_header);
+ spm->has_data_loss = 0;
+ spm->size_copied = 0;
+ spm->is_user_buf_filled = false;
+ spm->has_user_buf = true;
+ spm_mgr->spm_cntr->are_users_buf_filled = false;
+ spm_mgr->spm_cntr->have_users_buf_cnt++;
+ }
+out:
+ mutex_unlock(&spm_mgr->spm_cntr->spm_worker_mutex);
+ return ret;
+}
+
+static int spm_wait_for_fill_awake(struct amdgpu_spm_cntr *spm_cntr,
+ struct drm_amdgpu_spm_args *user_spm_data)
+{
+ int ret = 0;
+
+ long timeout = msecs_to_jiffies(user_spm_data->timeout);
+ unsigned long start_jiffies = jiffies;
+
+ ret = wait_event_interruptible_timeout(spm_cntr->spm_buf_wq,
+ (READ_ONCE(spm_cntr->are_users_buf_filled) ==
true),
+ timeout);
+
+ switch (ret) {
+ case -ERESTARTSYS:
+ /* Subtract elapsed time from timeout so we wait that much
+ * less when the call gets restarted.
+ */
+ timeout -= (jiffies - start_jiffies);
+ if (timeout <= 0) {
+ ret = -ETIME;
+ timeout = 0;
+ pr_debug("[%s] interrupted by signal\n", __func__);
+ }
+ break;
+
+ case 0:
+ default:
+ timeout = ret;
+ ret = 0;
+ break;
+ }
+ user_spm_data->timeout = jiffies_to_msecs(timeout);
+
+ return ret;
+}
+
+static int amdgpu_set_dest_buffer(struct amdgpu_spm_mgr *spm_mgr, void *data)
+{
+ struct amdgpu_device *adev = mgr_to_adev(spm_mgr, spm_mgr);
+ struct drm_amdgpu_spm_args user_spm_data, *user_spm_ptr;
+ struct amdgpu_spm_cntr *spm_cntr;
+ bool need_schedule = false;
+ unsigned long flags;
+ u32 ubufsize;
+ int ret = 0;
+ int inst;
+
+ dev_dbg(adev->dev, "SPM start to set new destination buffer.");
+ mutex_lock(&(to_prof_xcp_mgr(spm_mgr, spm_mgr)->mutex));
+ spm_cntr = spm_mgr->spm_cntr;
+ if (spm_cntr == NULL ||
+ !spm_cntr->spm_use_cnt) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ user_spm_ptr = (struct drm_amdgpu_spm_args *) data;
+ ubufsize = user_spm_ptr->buf_size / spm_cntr->spm_use_cnt;
+ ubufsize = rounddown(ubufsize, 32);
+
+ if (ubufsize <= sizeof(struct drm_amdgpu_spm_buffer_header)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ memcpy(&user_spm_data, user_spm_ptr, sizeof(user_spm_data));
+ user_spm_data.buf_size = ubufsize;
+
+ if (user_spm_data.timeout && spm_cntr->have_users_buf_cnt &&
+ !READ_ONCE(spm_cntr->are_users_buf_filled)) {
+ dev_dbg(adev->dev, "SPM waiting for fill awake, timeout = %d
ms.",
+ user_spm_data.timeout);
+ ret = spm_wait_for_fill_awake(spm_cntr, &user_spm_data);
+ if (ret == -ETIME) {
+ /* Copy (partial) data to user buffer after a timeout */
+ schedule_work(&spm_mgr->spm_work);
+ flush_work(&spm_mgr->spm_work);
+ /* This is not an error */
+ ret = 0;
+ } else if (ret) {
+ /* handle other errors normally, including -ERESTARTSYS
*/
+ goto out;
+ }
+ } else if (!user_spm_data.timeout && spm_cntr->have_users_buf_cnt) {
+ /* Copy (partial) data to user buffer */
+ schedule_work(&spm_mgr->spm_work);
+ flush_work(&spm_mgr->spm_work);
+ }
+
+ user_spm_ptr->bytes_copied = 0;
+ user_spm_ptr->has_data_loss = 0;
+ for_each_inst(inst, AMDGPU_XCC_MASK(adev)) {
+ struct amdgpu_spm_base *spm = &(spm_cntr->spm[inst]);
+
+ if (spm->has_user_buf || user_spm_data.dest_buf) {
+ /* Get info about filled space in previous output
buffer.
+ * Setup new dest buf if provided.
+ */
+ ret = spm_update_dest_info(spm_mgr, inst,
&user_spm_data, user_spm_ptr);
+ if (ret)
+ goto out;
+ }
+
+ if (user_spm_data.dest_buf) {
+ /* Start SPM if necessary*/
+ if (spm->is_spm_started == false) {
+ amdgpu_rlc_spm_cntl(adev, inst, 1);
+ spin_lock_irqsave(&spm_mgr->spm_irq_lock,
flags);
+ spm->is_spm_started = true;
+ /* amdgpu_rlc_spm_cntl() will reset SPM and
+ * wptr will become 0, adjust rptr accordingly.
+ */
+ spm->ring_rptr = 0;
+ spin_unlock_irqrestore(&spm_mgr->spm_irq_lock,
flags);
+ } else {
+ /* If SPM was already started, there may already
+ * be data in the ring-buffer that needs to be
read.
+ */
+ need_schedule = true;
+ }
+ user_spm_data.dest_buf += ubufsize;
+ } else {
+ amdgpu_rlc_spm_cntl(adev, inst, 0);
+ spin_lock_irqsave(&spm_mgr->spm_irq_lock, flags);
+ spm->is_spm_started = false;
+ /* amdgpu_rlc_spm_cntl() will reset SPM and wptr will
become 0.
+ * Adjust rptr accordingly
+ */
+ spm->ring_rptr = 0;
+ spin_unlock_irqrestore(&spm_mgr->spm_irq_lock, flags);
+ }
+ }
+
+out:
+ mutex_unlock(&(to_prof_xcp_mgr(spm_mgr, spm_mgr)->mutex));
+ if (need_schedule)
+ schedule_work(&spm_mgr->spm_work);
+
+ dev_dbg(adev->dev, "SPM finish to set new destination buffer, ret =
%d.", ret);
+ return ret;
+}
+
int amdgpu_spm_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
@@ -167,10 +365,13 @@ int amdgpu_spm_ioctl(struct drm_device *dev, void *data,
case AMDGPU_SPM_OP_ACQUIRE:
return amdgpu_spm_acquire(spm_mgr, filp);
+ case AMDGPU_SPM_OP_SET_DEST_BUF:
+ return amdgpu_set_dest_buffer(spm_mgr, data);
+
default:
dev_dbg(adev->dev, "Invalid option: %i\n", args->op);
- return -EINVAL;
}
+ return -EINVAL;
}
int amdgpu_spm_mgr_init(struct amdgpu_spm_mgr *spm_mgr)
@@ -189,5 +390,17 @@ void amdgpu_spm_mgr_fini(struct amdgpu_spm_mgr *spm_mgr)
void amdgpu_spm_interrupt(struct amdgpu_device *adev, int xcc_id)
{
- /* TODO */
+ uint8_t xcp_id;
+ unsigned long flags;
+ struct amdgpu_spm_mgr *spm_mgr;
+
+ xcp_id = adev->xcp_mgr ?
+ fls(amdgpu_xcp_get_partition(adev->xcp_mgr, AMDGPU_XCP_GFX,
xcc_id)) - 1 : 0;
+
+ spm_mgr = &(adev->prof_mgr.prof_xcp_mgr[xcp_id].spm_mgr);
+
+ spin_lock_irqsave(&spm_mgr->spm_irq_lock, flags);
+ if (spm_mgr->spm_cntr && spm_mgr->spm_cntr->spm[xcc_id].is_spm_started)
+ schedule_work(&spm_mgr->spm_work);
+ spin_unlock_irqrestore(&spm_mgr->spm_irq_lock, flags);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.h
index 9db89fd6154d..5eed6aa6482a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.h
@@ -24,15 +24,32 @@
#ifndef AMDGPU_SPM_H_
#define AMDGPU_SPM_H_
+struct spm_user_buf {
+ uint64_t __user *user_addr;
+ u32 ubufsize;
+};
+
struct amdgpu_spm_base {
+ struct spm_user_buf ubuf;
u64 gpu_addr;
u32 ring_size;
+ u32 ring_rptr;
+ u32 size_copied;
+ u32 has_data_loss;
u32 *cpu_addr;
void *spm_obj;
+ bool has_user_buf;
+ bool is_user_buf_filled;
+ bool is_spm_started;
};
+
struct amdgpu_spm_cntr {
struct amdgpu_spm_base spm[MAX_XCP];
+ int spm_use_cnt;
struct mutex spm_worker_mutex;
+ wait_queue_head_t spm_buf_wq;
+ u32 have_users_buf_cnt;
+ bool are_users_buf_filled;
};
struct amdgpu_spm_mgr {
@@ -43,6 +60,7 @@ struct amdgpu_spm_mgr {
/* spm data */
struct amdgpu_spm_cntr *spm_cntr;
struct work_struct spm_work;
+ spinlock_t spm_irq_lock;
};
int amdgpu_spm_ioctl(struct drm_device *dev, void *data,
--
2.34.1