to implement AMDGPU_SPM_OP_ACQUIRE, which grants a process exclusive
access to the SPM hardware on its XCP partition and allocates the
per-XCC ring buffers used for streaming performance counter data.
Data structures:
struct amdgpu_spm_base (amdgpu_spm.h):
Holds the per-XCC ring buffer state: GTT BO handle (spm_obj),
GPU virtual address (gpu_addr), kernel virtual address (cpu_addr),
and effective ring size (ring_size).
struct amdgpu_spm_cntr (amdgpu_spm.h):
Contains an array of amdgpu_spm_base[MAX_XCP] (one per XCC) and a
spm_worker_mutex to serialize worker operations. Allocated on first
ACQUIRE and freed on RELEASE.
struct amdgpu_spm_mgr (amdgpu_spm.h):
Extended with lead_thread (the acquiring process's thread group
leader), spm_cntr pointer, and spm_work work_struct for the deferred
ring buffer drain worker.
ACQUIRE flow (amdgpu_spm_acquire):
1. Takes the per-XCP prof_xcp_mgr->mutex to serialize concurrent
ACQUIRE attempts.
2. Returns -EBUSY if spm_cntr is already allocated (another process
holds SPM).
3. Allocates spm_cntr via kzalloc and records current->group_leader
as lead_thread.
4. For each XCC in AMDGPU_XCC_MASK(adev), calls _amdgpu_spm_acquire():
- Allocates a 4 MiB GTT ring buffer via amdgpu_bo_alloc_gtt_mem().
- Programs the ring buffer into the RLC hardware via
amdgpu_rlc_spm_acquire(), which also reserves a VMID for the
caller's VM.
- Subtracts 0x20 bytes (8 DWORDs) from ring_size to exclude the
hardware-defined metadata area at the end of the buffer.
- On failure, frees the GTT BO and clears the spm_base entry.
5. Initializes spm_worker_mutex and INIT_WORK for amdgpu_spm_work.
6. Records the drm_file pointer in spm_mgr->file.
7. On any per-XCC failure, rolls back all already-acquired XCCs via
_amdgpu_spm_release() (stub, TODO) and frees spm_cntr.
amdgpu_spm_work (work_struct handler):
Attaches the lead_thread's mm_struct via kthread_use_mm() to enable
user-space copy operations, then detaches and releases the mm. The
actual ring buffer drain to user space is a TODO for a later patch.
Three new navigation macros are added in amdgpu_profiler.h:
- to_prof_xcp_mgr(x, y): container_of from a member y to
amdgpu_profiler_xcp_mgr.
- xcp_to_prof_mgr(x, y): container_of from prof_xcp_mgr[] element to
amdgpu_profiler_mgr.
- mgr_to_adev(x, y): compound statement combining the two above to
reach amdgpu_device from any embedded manager pointer.
AMDGPU_XCC_MASK(adev) is added using GENMASK() to safely produce a
bitmask of active XCC instances from NUM_XCC(adev->gfx.xcc_mask).
In amdgpu_spm_ioctl(), the per-XCP spm_mgr is now resolved from fpriv
using AMDGPU_XCP_ID() before dispatching to sub-operation handlers.
Signed-off-by: James Zhu <[email protected]>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_profiler.h | 13 +++
drivers/gpu/drm/amd/amdgpu/amdgpu_spm.c | 114 ++++++++++++++++++-
drivers/gpu/drm/amd/amdgpu/amdgpu_spm.h | 17 +++
3 files changed, 143 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_profiler.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_profiler.h
index ea62a4dee364..587adadaedb8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_profiler.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_profiler.h
@@ -27,6 +27,7 @@
#include "amdgpu_spm.h"
+#define AMDGPU_XCC_MASK(adev) GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0)
#define AMDGPU_XCP_ID(x) (x == AMDGPU_XCP_NO_PARTITION ? 0 : x)
#define fpriv_to_prof_mgr(fpriv) (&(fpriv)->userq_mgr.adev->prof_mgr)
#define fpriv_to_adev(fpriv) ((fpriv)->userq_mgr.adev)
@@ -34,6 +35,18 @@
#define prof_mgr_to_adev(x) \
container_of(x, struct amdgpu_device, prof_mgr)
+#define to_prof_xcp_mgr(x, y) \
+ container_of(x, struct amdgpu_profiler_xcp_mgr, y)
+
+#define xcp_to_prof_mgr(x, y) \
+ container_of(x, struct amdgpu_profiler_mgr, y)
+
+#define mgr_to_adev(x, y) \
+({ struct amdgpu_profiler_xcp_mgr *prof_xcp_mgr = to_prof_xcp_mgr(x, y); \
+ struct amdgpu_profiler_mgr *prof_mgr = \
+ xcp_to_prof_mgr(prof_xcp_mgr,
prof_xcp_mgr[prof_xcp_mgr->xcp_id]);\
+ prof_mgr_to_adev(prof_mgr); })
+
struct amdgpu_profiler_xcp_mgr {
struct mutex mutex;
uint32_t xcp_id;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.c
index 6ff88dfabf1c..e58b89ccd83f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.c
@@ -30,10 +30,28 @@
*/
static int amdgpu_spm_release(struct amdgpu_spm_mgr *spm_mgr, struct drm_file
*filp);
+static void _amdgpu_spm_release(struct amdgpu_spm_mgr *spm_mgr, int inst,
struct drm_file *filp);
+
+static void amdgpu_spm_work(struct work_struct *work)
+{
+ struct amdgpu_spm_mgr *spm_mgr = container_of(work, struct
amdgpu_spm_mgr, spm_work);
+ struct mm_struct *mm = NULL;
+
+ mm = get_task_mm(spm_mgr->lead_thread);
+ if (mm) {
+ kthread_use_mm(mm);
+ { /* attach mm */
+ /* TODO: dump spm ring buffer to user buffer */
+ } /* detach mm */
+ kthread_unuse_mm(mm);
+ /* release the mm structure */
+ mmput(mm);
+ }
+}
static void amdgpu_spm_init_device(struct amdgpu_spm_mgr *spm_mgr)
{
- /* TODO */
+ spm_mgr->spm_cntr = NULL;
}
static void amdgpu_spm_release_device(struct amdgpu_spm_mgr *spm_mgr, struct
drm_file *filp)
@@ -41,6 +59,94 @@ static void amdgpu_spm_release_device(struct amdgpu_spm_mgr
*spm_mgr, struct drm
amdgpu_spm_release(spm_mgr, filp);
}
+static int _amdgpu_spm_acquire(struct amdgpu_spm_mgr *spm_mgr, int inst,
struct drm_file *filp)
+{
+ struct amdgpu_device *adev = mgr_to_adev(spm_mgr, spm_mgr);
+ struct amdgpu_spm_base *spm = &(spm_mgr->spm_cntr->spm[inst]);
+ int ret = 0;
+
+ /* allocate 4M spm ring buffer */
+ spm->ring_size = 4 * 1024 * 1024;
+
+ ret = amdgpu_bo_alloc_gtt_mem(adev,
+ spm->ring_size, &spm->spm_obj,
+ &spm->gpu_addr, (void *)&spm->cpu_addr,
+ false, false);
+
+ if (ret)
+ goto out;
+
+ ret = amdgpu_rlc_spm_acquire(adev, inst, drm_priv_to_vm(filp),
+ spm->gpu_addr, spm->ring_size);
+ if (ret)
+ goto rlc_spm_acquire_failure;
+
+ /*
+ * By definition, the last 8 DWs of the buffer are not part of the rings
+ * and are instead part of the Meta data area.
+ */
+ spm->ring_size -= 0x20;
+
+ goto out;
+
+rlc_spm_acquire_failure:
+ amdgpu_bo_free_gtt_mem(adev, &spm->spm_obj);
+ memset(spm, 0, sizeof(*spm));
+out:
+ return ret;
+}
+
+static int amdgpu_spm_acquire(struct amdgpu_spm_mgr *spm_mgr, struct drm_file
*filp)
+{
+ struct amdgpu_device *adev = mgr_to_adev(spm_mgr, spm_mgr);
+ int ret = 0;
+ int inst;
+
+ mutex_lock(&(to_prof_xcp_mgr(spm_mgr, spm_mgr)->mutex));
+
+ if (spm_mgr->spm_cntr) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ spm_mgr->spm_cntr = kzalloc(sizeof(struct amdgpu_spm_cntr), GFP_KERNEL);
+ if (!spm_mgr->spm_cntr) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ spm_mgr->lead_thread = current->group_leader;
+
+ for_each_inst(inst, AMDGPU_XCC_MASK(adev)) {
+ ret = _amdgpu_spm_acquire(spm_mgr, inst, filp);
+ if (ret)
+ goto acquire_spm_failure;
+ }
+
+ mutex_init(&spm_mgr->spm_cntr->spm_worker_mutex);
+
+ INIT_WORK(&spm_mgr->spm_work, amdgpu_spm_work);
+
+ spm_mgr->file = filp;
+
+ goto out;
+
+acquire_spm_failure:
+ for_each_inst(inst, AMDGPU_XCC_MASK(adev))
+ _amdgpu_spm_release(spm_mgr, inst, filp);
+ kfree(spm_mgr->spm_cntr);
+ spm_mgr->spm_cntr = NULL;
+
+out:
+ mutex_unlock(&(to_prof_xcp_mgr(spm_mgr, spm_mgr)->mutex));
+ return ret;
+}
+
+static void _amdgpu_spm_release(struct amdgpu_spm_mgr *spm_mgr, int inst,
struct drm_file *filp)
+{
+ /* TODO: */
+
+}
+
static int amdgpu_spm_release(struct amdgpu_spm_mgr *spm_mgr, struct drm_file
*filp)
{
/* TODO */
@@ -52,9 +158,15 @@ int amdgpu_spm_ioctl(struct drm_device *dev, void *data,
{
struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct amdgpu_device *adev = fpriv_to_adev(fpriv);
+ struct amdgpu_profiler_mgr *prof_mgr = fpriv_to_prof_mgr(fpriv);
struct drm_amdgpu_spm_args *args = data;
+ struct amdgpu_spm_mgr *spm_mgr =
+ &(prof_mgr->prof_xcp_mgr[AMDGPU_XCP_ID(fpriv->xcp_id)].spm_mgr);
switch (args->op) {
+ case AMDGPU_SPM_OP_ACQUIRE:
+ return amdgpu_spm_acquire(spm_mgr, filp);
+
default:
dev_dbg(adev->dev, "Invalid option: %i\n", args->op);
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.h
index dc55d2a8f016..9db89fd6154d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.h
@@ -24,8 +24,25 @@
#ifndef AMDGPU_SPM_H_
#define AMDGPU_SPM_H_
+struct amdgpu_spm_base {
+ u64 gpu_addr;
+ u32 ring_size;
+ u32 *cpu_addr;
+ void *spm_obj;
+};
+struct amdgpu_spm_cntr {
+ struct amdgpu_spm_base spm[MAX_XCP];
+ struct mutex spm_worker_mutex;
+};
+
struct amdgpu_spm_mgr {
struct drm_file *file;
+
+ struct task_struct *lead_thread;
+
+ /* spm data */
+ struct amdgpu_spm_cntr *spm_cntr;
+ struct work_struct spm_work;
};
int amdgpu_spm_ioctl(struct drm_device *dev, void *data,
--
2.34.1