On Fri, Aug 22, 2025 at 6:13 AM Jesse.Zhang <jesse.zh...@amd.com> wrote: > > From: Alex Deucher <alexander.deuc...@amd.com> > > Use the suspend and resume API rather than remove queue > and add queue API. The former just preempts the queue > while the latter remove it from the scheduler completely. > There is no need to do that, we only need preemption > in this case. > > V2: replace queue_active with queue state > v3: set the suspend_fence_addr > v4: allocate another per queue buffer for the suspend fence, and set the > sequence number. > also wait for the suspend fence. (Alex) > > Signed-off-by: Alex Deucher <alexander.deuc...@amd.com> > Signed-off-by: Jesse Zhang <jesse.zh...@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 2 + > drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 96 ++++++++++++++++++++++ > 2 files changed, 98 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h > index 5111d7dce86f..d58cf48b456b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h > @@ -65,6 +65,8 @@ struct amdgpu_usermode_queue { > struct dma_fence *last_fence; > u32 xcp_id; > int priority; > + struct amdgpu_userq_obj suspend_fence; > + uint64_t suspend_fence_seq; > }; > > struct amdgpu_userq_funcs { > diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c > b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c > index d6f50b13e2ba..ce5ac8c2f7e1 100644 > --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c > +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c > @@ -28,6 +28,8 @@ > > #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE > #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE > +#define AMDGPU_USERQ_SUSPEND_FENCE_SZ PAGE_SIZE > +#define AMDGPU_USERQ_FENCE_TIMEOUT (1000000000) > > static int > mes_userq_map_gtt_bo_to_gart(struct amdgpu_bo *bo) > @@ -347,9 +349,103 @@ mes_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, > amdgpu_userq_destroy_object(uq_mgr, &queue->mqd); > } > > +static int mes_userq_wait_fence(struct amdgpu_userq_mgr *uq_mgr, > + struct amdgpu_userq_obj *fence_obj, > + uint64_t fence_value, > + uint64_t timeout) > +{ > + /* This would typically involve polling the fence value in memory > + * until it matches the expected value or timeout occurs > + */ > + uint64_t start_time = ktime_get_ns(); > + uint64_t *fence_addr = fence_obj->cpu_ptr; > + > + while (*fence_addr != fence_value) { > + if (ktime_get_ns() - start_time > timeout) { > + return -ETIMEDOUT; > + } > + udelay(10); > + } > + return 0; > +}
This can be simplified: signed long amdgpu_userq_fence_wait_polling(uint64_t *fence_addr, uint64_t wait_seq, signed long timeout) { while ((int64_t)(wait_seq - *fence_addr) > 0 && timeout > 0) { udelay(2); timeout -= 2; } return timeout > 0 ? timeout : 0; } > + > +static int mes_userq_preempt(struct amdgpu_userq_mgr *uq_mgr, > + struct amdgpu_usermode_queue *queue) > +{ > + struct amdgpu_device *adev = uq_mgr->adev; > + struct mes_suspend_gang_input queue_input; > + struct amdgpu_userq_obj *ctx = &queue->fw_obj; > + int r; > + > + if (queue->state != AMDGPU_USERQ_STATE_MAPPED) > + return 0; > + if (queue->state == AMDGPU_USERQ_STATE_PREEMPTED) > + return 0; > + > + /* Allocate suspend fence buffer if not already allocated */ > + if (!queue->suspend_fence.obj) { > + r = amdgpu_userq_create_object(uq_mgr, &queue->suspend_fence, > AMDGPU_USERQ_SUSPEND_FENCE_SZ); > + if (r) { > + DRM_ERROR("Failed to allocate suspend fence > buffer\n"); > + return r; > + } > + queue->suspend_fence_seq = 0; > + } We only need a little bit of memory. It's probably more efficient to just grab a wb slot. Also this current code leaks memory because we never free this. Something like: u32 fence_offset; u64 *fence_ptr; r = amdgpu_device_wb_get(adev, &fence_offset); if (r) return r; fence_gpu_addr = adev->wb.gpu_addr + (fence_offset * 4); fence_ptr = (u64 *)&adev->wb.wb[fence_offset]; *fence_ptr = 0; then later when you are done with it at the end of the function: amdgpu_device_wb_free(adev, fence_offset); > + > + memset(&queue_input, 0x0, sizeof(struct mes_suspend_gang_input)); > + queue_input.gang_context_addr = ctx->gpu_addr + > AMDGPU_USERQ_PROC_CTX_SZ; > + queue_input.suspend_fence_addr = queue->suspend_fence.gpu_addr; > + queue_input.suspend_fence_value = ++queue->suspend_fence_seq; queue_input.suspend_fence_addr = fence_gpu_addr; queue_input.suspend_fence_value = 1; Alex > + amdgpu_mes_lock(&adev->mes); > + r = adev->mes.funcs->suspend_gang(&adev->mes, &queue_input); > + amdgpu_mes_unlock(&adev->mes); > + > + if (r) { > + DRM_ERROR("Failed to suspend gang: %d\n", r); > + return r; > + } > + > + /* Wait for suspend fence to be signaled */ > + r = mes_userq_wait_fence(uq_mgr, &queue->suspend_fence, > + queue_input.suspend_fence_value, > + AMDGPU_USERQ_FENCE_TIMEOUT); > + if (r) { > + DRM_ERROR("Suspend fence timeout\n"); > + return r; > + } > + > + return 0; > +} > + > +static int mes_userq_restore(struct amdgpu_userq_mgr *uq_mgr, > + struct amdgpu_usermode_queue *queue) > +{ > + struct amdgpu_device *adev = uq_mgr->adev; > + struct mes_resume_gang_input queue_input; > + struct amdgpu_userq_obj *ctx = &queue->fw_obj; > + int r; > + > + if (queue->state == AMDGPU_USERQ_STATE_HUNG) > + return -EINVAL; > + if (queue->state != AMDGPU_USERQ_STATE_PREEMPTED) > + return 0; > + > + memset(&queue_input, 0x0, sizeof(struct mes_resume_gang_input)); > + queue_input.gang_context_addr = ctx->gpu_addr + > AMDGPU_USERQ_PROC_CTX_SZ; > + > + amdgpu_mes_lock(&adev->mes); > + r = adev->mes.funcs->resume_gang(&adev->mes, &queue_input); > + amdgpu_mes_unlock(&adev->mes); > + if (r) > + dev_err(adev->dev, "Failed to resume queue, err (%d)\n", r); > + return r; > + } > + > const struct amdgpu_userq_funcs userq_mes_funcs = { > .mqd_create = mes_userq_mqd_create, > .mqd_destroy = mes_userq_mqd_destroy, > .unmap = mes_userq_unmap, > .map = mes_userq_map, > + .preempt = mes_userq_preempt, > + .restore = mes_userq_restore, > }; > -- > 2.49.0 >