reserve space to avoid page fault and data loss. A hardware bug on GFX IP versions earlier than 11.0.1 causes the RLC SPM hardware to write slightly beyond the end of the declared ring buffer when the ring wraps around. This can corrupt adjacent memory and cause page faults or silent data loss.
The workaround reserves an extra 0x400 bytes (1 KiB) of guard space immediately after the ring buffer's declared end, pads it with a known sentinel value (SPM_OVERFLOW_MAGIC = 0xBEEFABCDDEADABCDULL), and recovers any counter data that spilled into this region before it is overwritten by the next wrap-around. The overflow size varies, the overflow data is valid data, and the wrap-around write starts from cpu_addr + overflow_size + 0x20 instead of cpu_addr + 0x20. Implementation: amdgpu_spm_init_device(): Detects affected hardware via ip_versions[GC_HWIP][0] < IP_VERSION(12, 0, 0) and sets spm_overflow_reserved = 0x400. This field is zero on unaffected hardware, making the entire workaround a no-op for GFX11+. _amdgpu_spm_acquire(): Before calling amdgpu_rlc_spm_acquire(), subtracts spm_overflow_reserved from ring_size so the hardware is told a smaller ring size, leaving the guard region beyond the hardware's declared ring end. After the 0x20-byte metadata subtraction, calls amdgpu_spm_preset() to fill the guard region with SPM_OVERFLOW_MAGIC. amdgpu_spm_preset(spm, size): Writes SPM_OVERFLOW_MAGIC to the guard region starting at cpu_addr + ring_size + 0x20 for 'size' bytes. Writes are 0x20-byte (32-byte) stride aligned, matching the SPM hardware's fill granularity. amdgpu_spm_read_ring_buffer() wrap-around path: When wptr < rptr (ring has wrapped), scans the guard region in 0x20- byte steps to measure how many bytes of overflow data were written by the hardware (overflow_size): stops at the first 64-bit word still containing SPM_OVERFLOW_MAGIC (unwritten). If overflow_size > 0, logs a debug message and copies the overflow data back to ring_start (cpu_addr + 0x20) via memcpy() to make it available for normal copy processing. After the ring drain completes, calls amdgpu_spm_preset() to re-arm the guard region with SPM_OVERFLOW_MAGIC for the next wrap-around. Signed-off-by: James Zhu <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_spm.c | 42 +++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_spm.h | 2 ++ 2 files changed, 44 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.c index 9f0d1f688d5e..9b7bb15a3785 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.c @@ -32,9 +32,24 @@ #define AMDGPU_SPM_MAJOR_VERSION 0 #define AMDGPU_SPM_MINOR_VERSION 1 +/* used to detect SPM overflow */ +#define SPM_OVERFLOW_MAGIC 0xBEEFABCDDEADABCDULL + static int amdgpu_spm_release(struct amdgpu_spm_mgr *spm_mgr, struct drm_file *filp); static void _amdgpu_spm_release(struct amdgpu_spm_mgr *spm_mgr, int inst, struct drm_file *filp); +static void amdgpu_spm_preset(struct amdgpu_spm_base *spm, u32 size) +{ + uint64_t *overflow_ptr, *overflow_end_ptr; + + overflow_ptr = (uint64_t *)((uint64_t)spm->cpu_addr + + spm->ring_size + 0x20); + overflow_end_ptr = overflow_ptr + (size >> 3); + /* SPM data filling is 0x20 alignment */ + for ( ; overflow_ptr < overflow_end_ptr; overflow_ptr += 4) + *overflow_ptr = SPM_OVERFLOW_MAGIC; +} + static int amdgpu_spm_data_copy(struct amdgpu_spm_mgr *spm_mgr, u32 size_to_copy, int inst) { struct amdgpu_spm_base *spm = &(spm_mgr->spm_cntr->spm[inst]); @@ -79,6 +94,7 @@ static int amdgpu_spm_read_ring_buffer(struct amdgpu_spm_mgr *spm_mgr, int inst) { struct amdgpu_device *adev = mgr_to_adev(spm_mgr, spm_mgr); struct amdgpu_spm_base *spm = &(spm_mgr->spm_cntr->spm[inst]); + u32 overflow_size = 0; u32 size_to_copy; int ret = 0; u32 ring_wptr; @@ -110,6 +126,22 @@ static int amdgpu_spm_read_ring_buffer(struct amdgpu_spm_mgr *spm_mgr, int inst) size_to_copy = ring_wptr - spm->ring_rptr; ret = amdgpu_spm_data_copy(spm_mgr, size_to_copy, inst); } else { + uint64_t *ring_start, *ring_end; + + ring_start = (uint64_t *)((uint64_t)spm->cpu_addr + 0x20); + ring_end = ring_start + (spm->ring_size >> 3); + for ( ; overflow_size < spm_mgr->spm_overflow_reserved; overflow_size += 0x20) { + uint64_t *overflow_ptr = ring_end + (overflow_size >> 3); + + if (*overflow_ptr == SPM_OVERFLOW_MAGIC) + break; + } + if (overflow_size) + dev_dbg(adev->dev, + "SPM ring buffer overflow size 0x%x", overflow_size); + /* move overflow counters into ring buffer to avoid data loss */ + memcpy(ring_start, ring_end, overflow_size); + size_to_copy = spm->ring_size - spm->ring_rptr; ret = amdgpu_spm_data_copy(spm_mgr, size_to_copy, inst); @@ -128,6 +160,7 @@ static int amdgpu_spm_read_ring_buffer(struct amdgpu_spm_mgr *spm_mgr, int inst) } exit: + amdgpu_spm_preset(spm, overflow_size); amdgpu_rlc_spm_set_rdptr(adev, inst, spm->ring_rptr); return ret; } @@ -169,6 +202,12 @@ static void amdgpu_spm_work(struct work_struct *work) static void amdgpu_spm_init_device(struct amdgpu_spm_mgr *spm_mgr) { + struct amdgpu_device *adev = mgr_to_adev(spm_mgr, spm_mgr); + + /* pre-gfx11 spm has a hardware bug to cause overflow */ + if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(12, 0, 0)) + spm_mgr->spm_overflow_reserved = 0x400; + spm_mgr->spm_cntr = NULL; } @@ -194,6 +233,8 @@ static int _amdgpu_spm_acquire(struct amdgpu_spm_mgr *spm_mgr, int inst, struct if (ret) goto out; + /* reserve space to fix spm overflow */ + spm->ring_size -= spm_mgr->spm_overflow_reserved; ret = amdgpu_rlc_spm_acquire(adev, inst, drm_priv_to_vm(filp), spm->gpu_addr, spm->ring_size); if (ret) @@ -204,6 +245,7 @@ static int _amdgpu_spm_acquire(struct amdgpu_spm_mgr *spm_mgr, int inst, struct * and are instead part of the Meta data area. */ spm->ring_size -= 0x20; + amdgpu_spm_preset(spm, spm_mgr->spm_overflow_reserved); goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.h index 5eed6aa6482a..f00a4751643e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_spm.h @@ -61,6 +61,8 @@ struct amdgpu_spm_mgr { struct amdgpu_spm_cntr *spm_cntr; struct work_struct spm_work; spinlock_t spm_irq_lock; + /* reserve space to fix spm overflow */ + u32 spm_overflow_reserved; }; int amdgpu_spm_ioctl(struct drm_device *dev, void *data, -- 2.34.1
